Udemy Data Science & Machine Learning Bootcamp

</mark></center>

In [1]:
# For displaying the figures:
from IPython import display 

**1) NumPy:-**

NumPy is a Linear Algebra library for Data Science in Python. The NumPy Arrays essentially come in two falvors: i) 1-D Vectors & ii) 2-D Matrices. The NumPy arrays come without any commas.
In [1]:
import numpy as np

1.1) NumPy Arrays

In [5]:
# From Python 1-D List to a 1-D NumPy Array:
l1 = [1, 2, 3]
arr_1d = np.array(l1)

print(l1)
print(arr_1d)

print()
# From Python 2-D List to a 2-D NumPy Array:
l2 = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
arr_2d = np.array(l2)

print(l2)
print(arr_2d)
[1, 2, 3]
[1 2 3]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
In [9]:
# A range of numbers in a Python List Vs a NumPy Array:
lr = list(range(0, 11, 2))
nr = np.arange(0, 11, 2)

print(lr)
print(nr)
[0, 2, 4, 6, 8, 10]
[ 0  2  4  6  8 10]
In [10]:
# Generating a NumPy array with all ones or zeros:
print(np.ones(3))
print(np.ones((3, 3)))     # Notice the dimensions as a Tuple
print(np.zeros(3))
print(np.zeros((3, 3)))
[1. 1. 1.]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
[0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
In [25]:
# Evenly separated numbers in a NumPy Array:
al = np.linspace(0, 20, 5)
an = np.array(al)

print(al)
print(type(al))
print(an)
print(type(an))
[ 0.  5. 10. 15. 20.]
<class 'numpy.ndarray'>
[ 0.  5. 10. 15. 20.]
<class 'numpy.ndarray'>
In [26]:
# A Singular matrix in NumPy:
np.eye(3)
Out[26]:
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
In [44]:
# Random Numbers Matrices using NumPy Arrays:
arn1 = np.random.rand(5)
arn2 = np.random.rand(2, 3)
arn3 = np.random.rand(1, 3, 2)

print(arn1)
print(arn2)
print(arn3)
[0.3948646  0.84443839 0.74732138 0.52884184 0.95971806]
[[0.35602274 0.0588952  0.96454993]
 [0.33374081 0.12244598 0.38442935]]
[[[0.39025306 0.98664241]
  [0.37202628 0.71334309]
  [0.12202268 0.97845597]]]
In [40]:
# Random Integer within a certain range:
rn1 = np.random.randint(15)
rn2 = np.random.randint(2, 10)
rn3 = np.random.randint(2, 50, 10)

print(rn1)
print(rn2)
print(rn3)
7
3
[31 11 34 30 16  6 34 11  8 28]
In [48]:
# Reshape a 1-D Array to a 2-D Matrix:
arr1 = np.arange(9)
arr2 = arr1.reshape(3, 3)

print(arr1)
print(arr2)
[0 1 2 3 4 5 6 7 8]
[[0 1 2]
 [3 4 5]
 [6 7 8]]
In [51]:
# Get the Min, Max values and their Index locations from an NumPy Array:
arn1 = np.random.rand(5)
arn2 = np.random.rand(2, 3)

print(arn1)
print(arn2)
print()
print(arn1.min())
print(arn1.argmin())
print()
print(arn2.min())
print(arn2.argmax())
[0.36237514 0.70198179 0.19938975 0.83591727 0.02504639]
[[0.30551424 0.4584796  0.90028695]
 [0.89891182 0.17907087 0.35978608]]

0.025046393210084616
4

0.1790708679592492
2
In [53]:
# Get the shape of a NumPy Array:
arn1 = np.random.rand(5)
arn2 = np.random.rand(2, 3)

print(arn1)
print(arn2)
print()
print(arn1.shape)
print(arn2.shape)
[0.14172492 0.63153177 0.36530867 0.13500249 0.41210385]
[[0.66726892 0.98494132 0.03216209]
 [0.57237246 0.19943968 0.66173158]]

(5,)
(2, 3)
In [56]:
# To get the data type of elements in a NumPy Array:
arn1 = np.random.rand(2, 3)
arr1 = np.arange(9)
arr2 = arr1.reshape(3, 3)

print(arr1)
print(arr2)

print(arn1.dtype)
float64
In [76]:
# Array Indexing and Slicing:
ar1 = np.arange(9)
ar2 = ar1.reshape(3, 3)

print(ar1)
print(ar2)

print()
print(ar1[2])
print(ar1[3:8])

print(ar2[0][1])
print(ar2[2][2])

print()
print(ar2[:2, 1:])
print(ar2[1, :])
print(ar2[:, 1])
[0 1 2 3 4 5 6 7 8]
[[0 1 2]
 [3 4 5]
 [6 7 8]]

2
[3 4 5 6 7]
1
8

[[1 2]
 [4 5]]
[3 4 5]
[1 4 7]
In [91]:
# Conditional Seletctions:
ar = np.arange(1, 11)
print(ar)

bool_ar = ar > 4
print(bool_ar)

arc = ar[bool_ar]
print(arc)
print(ar[ar >= 6])
[ 1  2  3  4  5  6  7  8  9 10]
[False False False False  True  True  True  True  True  True]
[ 5  6  7  8  9 10]
[ 6  7  8  9 10]

1.2) NumPy Operations

In [95]:
# Adding and Subtracting two or more NumPy Arrays elementwise:
ar1 = np.arange(1, 6)
ar2 = np.arange(6, 11)

print(ar1)
print(ar2)

ara = ar1 + ar2
ars = ar1 - ar2
print(ara)
print(ars)
[1 2 3 4 5]
[ 6  7  8  9 10]
[ 7  9 11 13 15]
[-5 -5 -5 -5 -5]
In [99]:
# A single element gets added/subtracted from an entire NumPy array:
arr = np.arange(21, 31)

print(arr)
print(arr-10)
print(arr+5)
[21 22 23 24 25 26 27 28 29 30]
[11 12 13 14 15 16 17 18 19 20]
[26 27 28 29 30 31 32 33 34 35]
In [103]:
# Universal NumPy Array functions for Mathematical operations:
arr = np.arange(1, 11)

print(np.sqrt(arr))
print(np.exp(arr))
print(np.log(arr))
[1.         1.41421356 1.73205081 2.         2.23606798 2.44948974
 2.64575131 2.82842712 3.         3.16227766]
[2.71828183e+00 7.38905610e+00 2.00855369e+01 5.45981500e+01
 1.48413159e+02 4.03428793e+02 1.09663316e+03 2.98095799e+03
 8.10308393e+03 2.20264658e+04]
[0.         0.69314718 1.09861229 1.38629436 1.60943791 1.79175947
 1.94591015 2.07944154 2.19722458 2.30258509]

**2) Pandas:-**

Pandas is a library for Data cleaning, analysis in Python. The main kind of data in Pandas are i) Series and ii) DataFrame.
In [2]:
import pandas as pd

2.1) Pandas Series

Series can be indexed with labels. The syntax is pd.Series(data, labels)
In [6]:
# Converting a List, NumPy array, Dictionary to a Pandas Series:

labels = ['a','b','c']       # Labels
my_list = [10,20,30]         # List
np_arr = np.array([10,20,30])   # NumPy Array
d = {'a':10,'b':20,'c':30}   # Dictionary

# List to Series
s1 = pd.Series(my_list)

# List to Series with index
s2 = pd.Series(my_list, labels)

# NumPy array to Series with index
s3 = pd.Series(np_arr, labels)

# Dictionary to Series- Keys as index labels
s4 = pd.Series(d)

print(s1, '\n***')
print(s2, '\n***')
print(s3, '\n***')
print(s4, '\n***')
0    10
1    20
2    30
dtype: int64 
***
a    10
b    20
c    30
dtype: int64 
***
a    10
b    20
c    30
dtype: int32 
***
a    10
b    20
c    30
dtype: int64 
***
In [17]:
# Using Series Index to access data points:

# First Subject
students_marks_ce1010 = pd.Series([27, 23, 19, 29, 30], ['John', 'Tim', 'Steve', 'Mike', 'Kevin'])
# Second Subject
students_marks_ce5410 = pd.Series([23, 25, 22, 24, 30], ['John', 'Tim', 'Steve', 'Mike', 'Kevin'])

print(students_marks_ce1010, '\n')
print(students_marks_ce5410, '\n')

# Accessing Scores
print(students_marks_ce1010['Tim'],'\n')

# Merging Series based off Index
total_marks = students_marks_ce1010 + students_marks_ce5410
print('Total Marks from both the Courses:\n',total_marks)
John     27
Tim      23
Steve    19
Mike     29
Kevin    30
dtype: int64 

John     23
Tim      25
Steve    22
Mike     24
Kevin    30
dtype: int64 

23 

Total Marks from both the Courses:
 John     50
Tim      48
Steve    41
Mike     53
Kevin    60
dtype: int64

2.2) Pandas DataFrames

In [3]:
from numpy.random import randn
np.random.seed(101)
The DataFrame is a bunch of Series objects put together to share the same index. The syntax for the Pandas DataFrames goes as: pd.DataFrame(data, row_labels, column_labels )
In [122]:
# Creating a DataFrame:
df = pd.DataFrame(randn(5,4), index='A B C D E'.split(), columns='W X Y Z'.split())
df
Out[122]:
W X Y Z
A -1.409126 0.870907 1.585812 0.929658
B -0.550876 0.942045 -0.975349 -1.054851
C 2.165421 -1.267240 -0.802843 -0.253479
D 0.090699 1.321715 0.794129 1.359712
E 0.771970 0.294217 -1.391519 0.147486

2.2.1) Accessing and Removing data:

In [123]:
# Single column
print(df['W'])
print(type(df['W']), '\n')

# Multiple columns
print(df[['W', 'X']], '\n')

# Removing rows and columns (use axis=1)
df1 = df.drop('E')
df2 = df.drop('Z', axis=1)
print(df.drop('E'), '\n')
print(df.drop('Z', axis=1), '\n')
print('df1: \n',df1, '\n')
print('df2: \n',df2, '\n')


print('df: \n',df)
df.drop('Z', axis=1, inplace=True)
print('Updated df: \n',df)

# Adding new Columns
df['Z1'] = df['W'] + df['Y']
df
A   -1.409126
B   -0.550876
C    2.165421
D    0.090699
E    0.771970
Name: W, dtype: float64
<class 'pandas.core.series.Series'> 

          W         X
A -1.409126  0.870907
B -0.550876  0.942045
C  2.165421 -1.267240
D  0.090699  1.321715
E  0.771970  0.294217 

          W         X         Y         Z
A -1.409126  0.870907  1.585812  0.929658
B -0.550876  0.942045 -0.975349 -1.054851
C  2.165421 -1.267240 -0.802843 -0.253479
D  0.090699  1.321715  0.794129  1.359712 

          W         X         Y
A -1.409126  0.870907  1.585812
B -0.550876  0.942045 -0.975349
C  2.165421 -1.267240 -0.802843
D  0.090699  1.321715  0.794129
E  0.771970  0.294217 -1.391519 

df1: 
           W         X         Y         Z
A -1.409126  0.870907  1.585812  0.929658
B -0.550876  0.942045 -0.975349 -1.054851
C  2.165421 -1.267240 -0.802843 -0.253479
D  0.090699  1.321715  0.794129  1.359712 

df2: 
           W         X         Y
A -1.409126  0.870907  1.585812
B -0.550876  0.942045 -0.975349
C  2.165421 -1.267240 -0.802843
D  0.090699  1.321715  0.794129
E  0.771970  0.294217 -1.391519 

df: 
           W         X         Y         Z
A -1.409126  0.870907  1.585812  0.929658
B -0.550876  0.942045 -0.975349 -1.054851
C  2.165421 -1.267240 -0.802843 -0.253479
D  0.090699  1.321715  0.794129  1.359712
E  0.771970  0.294217 -1.391519  0.147486
Updated df: 
           W         X         Y
A -1.409126  0.870907  1.585812
B -0.550876  0.942045 -0.975349
C  2.165421 -1.267240 -0.802843
D  0.090699  1.321715  0.794129
E  0.771970  0.294217 -1.391519
Out[123]:
W X Y Z1
A -1.409126 0.870907 1.585812 0.176687
B -0.550876 0.942045 -0.975349 -1.526225
C 2.165421 -1.267240 -0.802843 1.362578
D 0.090699 1.321715 0.794129 0.884827
E 0.771970 0.294217 -1.391519 -0.619549

2.2.2) Indexing and Slicing using loc and iloc:

In [124]:
# Accessing Rows using loc
print(df.loc['A'], '\n')          # Single Row
print(df.loc[['A', 'C']], '\n')   # Two different Rows
print(df.loc['A':'C'], '\n')      # Slicing of Rows

# Accessing Rows using iloc
print(df.iloc[0], '\n')           # Single Row
print(df.iloc[[0, 2]], '\n')      # Two different Rows
print(df.iloc[0:2], '\n')         # Slicing of Rows

'''Interesting fact to note is that .loc[m:n] will go from m till n, including n.
   However, .iloc[i:j] will go from row / column no. i till no. j-1, excluding j.'''

# A subset of DataFrame can also be obtained
print(df.loc[['A', 'C'], ['W', 'Y']], '\n')
print(df.iloc[0:3,0:2])
W    -1.409126
X     0.870907
Y     1.585812
Z1    0.176687
Name: A, dtype: float64 

          W         X         Y        Z1
A -1.409126  0.870907  1.585812  0.176687
C  2.165421 -1.267240 -0.802843  1.362578 

          W         X         Y        Z1
A -1.409126  0.870907  1.585812  0.176687
B -0.550876  0.942045 -0.975349 -1.526225
C  2.165421 -1.267240 -0.802843  1.362578 

W    -1.409126
X     0.870907
Y     1.585812
Z1    0.176687
Name: A, dtype: float64 

          W         X         Y        Z1
A -1.409126  0.870907  1.585812  0.176687
C  2.165421 -1.267240 -0.802843  1.362578 

          W         X         Y        Z1
A -1.409126  0.870907  1.585812  0.176687
B -0.550876  0.942045 -0.975349 -1.526225 

          W         Y
A -1.409126  1.585812
C  2.165421 -0.802843 

          W         X
A -1.409126  0.870907
B -0.550876  0.942045
C  2.165421 -1.267240

2.2.3) Conditional Selection:

In [125]:
# Booleans
print(df>0, '\n')
print(df[df>0], '\n')

# Booleans on a particular column
print(df['W'] < 0, '\n')
print(df[df['W'] < 0], '\n')

df_sub = df[df['W']>0]
print(df_sub, '\n')

# We can also use multiple conditionals
print(df[(df['W']>0) & (df['Y'] < 0)], '\n')    # AND
print(df[(df['W']>0) | (df['Y'] < 0)], '\n')    # OR
       W      X      Y     Z1
A  False   True   True   True
B  False   True  False  False
C   True  False  False   True
D   True   True   True   True
E   True   True  False  False 

          W         X         Y        Z1
A       NaN  0.870907  1.585812  0.176687
B       NaN  0.942045       NaN       NaN
C  2.165421       NaN       NaN  1.362578
D  0.090699  1.321715  0.794129  0.884827
E  0.771970  0.294217       NaN       NaN 

A     True
B     True
C    False
D    False
E    False
Name: W, dtype: bool 

          W         X         Y        Z1
A -1.409126  0.870907  1.585812  0.176687
B -0.550876  0.942045 -0.975349 -1.526225 

          W         X         Y        Z1
C  2.165421 -1.267240 -0.802843  1.362578
D  0.090699  1.321715  0.794129  0.884827
E  0.771970  0.294217 -1.391519 -0.619549 

          W         X         Y        Z1
C  2.165421 -1.267240 -0.802843  1.362578
E  0.771970  0.294217 -1.391519 -0.619549 

          W         X         Y        Z1
B -0.550876  0.942045 -0.975349 -1.526225
C  2.165421 -1.267240 -0.802843  1.362578
D  0.090699  1.321715  0.794129  0.884827
E  0.771970  0.294217 -1.391519 -0.619549 

2.2.4) Resetting and Chaging the index:

In [126]:
# Reset the Index: To the default index 0, 1, 2,...
print('Current DataFrame: \n',df)
print('\n',df.reset_index())
print('\n',df)
df.reset_index(inplace=True)
print('\n',df)

# Set a particular column as index
labels = [1, 2, 3, 4, 5]
df.set_index('index', inplace=True)
print('\n',df)
Current DataFrame: 
           W         X         Y        Z1
A -1.409126  0.870907  1.585812  0.176687
B -0.550876  0.942045 -0.975349 -1.526225
C  2.165421 -1.267240 -0.802843  1.362578
D  0.090699  1.321715  0.794129  0.884827
E  0.771970  0.294217 -1.391519 -0.619549

   index         W         X         Y        Z1
0     A -1.409126  0.870907  1.585812  0.176687
1     B -0.550876  0.942045 -0.975349 -1.526225
2     C  2.165421 -1.267240 -0.802843  1.362578
3     D  0.090699  1.321715  0.794129  0.884827
4     E  0.771970  0.294217 -1.391519 -0.619549

           W         X         Y        Z1
A -1.409126  0.870907  1.585812  0.176687
B -0.550876  0.942045 -0.975349 -1.526225
C  2.165421 -1.267240 -0.802843  1.362578
D  0.090699  1.321715  0.794129  0.884827
E  0.771970  0.294217 -1.391519 -0.619549

   index         W         X         Y        Z1
0     A -1.409126  0.870907  1.585812  0.176687
1     B -0.550876  0.942045 -0.975349 -1.526225
2     C  2.165421 -1.267240 -0.802843  1.362578
3     D  0.090699  1.321715  0.794129  0.884827
4     E  0.771970  0.294217 -1.391519 -0.619549

               W         X         Y        Z1
index                                        
A     -1.409126  0.870907  1.585812  0.176687
B     -0.550876  0.942045 -0.975349 -1.526225
C      2.165421 -1.267240 -0.802843  1.362578
D      0.090699  1.321715  0.794129  0.884827
E      0.771970  0.294217 -1.391519 -0.619549

2.2.5) Multi-index and Index Hierarchy:

In [127]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))

# Forming the MultiIndex first
hier_index = pd.MultiIndex.from_tuples(hier_index)
In [131]:
hier_index
Out[131]:
MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )
In [129]:
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])
df
Out[129]:
A B
G1 1 -0.729466 -0.720110
2 -0.285257 2.090539
3 1.004056 1.031916
G2 1 -1.081118 -0.000355
2 -0.219882 0.292795
3 0.834355 -0.140432
For index hierarchy we use df.loc[], if this was on the columns axis, you would just use normal bracket notation df[]. Calling one level of the index returns the sub-dataframe:
In [260]:
df.loc['G1']
Out[260]:
A B
1 0.153661 0.167638
2 -0.765930 0.962299
3 0.902826 -0.537909
In [140]:
df.loc['G1']['A']
Out[140]:
Num
1   -0.729466
2   -0.285257
3    1.004056
Name: A, dtype: float64
In [136]:
df.loc['G2'].loc[1]
Out[136]:
A   -1.081118
B   -0.000355
Name: 1, dtype: float64
In [265]:
df.index.names
Out[265]:
FrozenList([None, None])
In [132]:
df.index.names = ['Group','Num']
df
Out[132]:
A B
Group Num
G1 1 -0.729466 -0.720110
2 -0.285257 2.090539
3 1.004056 1.031916
G2 1 -1.081118 -0.000355
2 -0.219882 0.292795
3 0.834355 -0.140432
In [270]:
df.xs('G1')
Out[270]:
A B
Num
1 0.153661 0.167638
2 -0.765930 0.962299
3 0.902826 -0.537909
In [138]:
df.xs(['G1',1])
Out[138]:
A   -0.729466
B   -0.720110
Name: (G1, 1), dtype: float64
In [273]:
df.xs(1,level='Num')
Out[273]:
A B
Group
G1 0.153661 0.167638
G2 -1.549671 0.435253

2.2.6) Handling Missing Values:

We can adopt multiple strategies to find, drop or fill in Missing values: 1) Finding Missing values positions: df.isna() 2) Dropping Missing values across rows: df.dropna() 3) Dropping Missing values across cols: df.dropna(axis=1) 4) Dropping only if Missing values are above a threshold 'x': df.dropna(thresh=x) 5) Filling in all Missing Values with a constant 'x': df.fillna(x) 6) Filling in all Missing Values with a column parameter: df[col].fillna(df[col].mean())
In [165]:
df = pd.DataFrame({'A':[1,2,np.nan],
                  'B':[5,np.nan,np.nan],
                  'C':[1,2,3]})

df
Out[165]:
A B C
0 1.0 5.0 1
1 2.0 NaN 2
2 NaN NaN 3
In [166]:
# Finding the Missing Values:
df.isna()
Out[166]:
A B C
0 False False False
1 False True False
2 True True False
In [167]:
# Dropping Missing values across rows:
df.dropna(inplace=False)
Out[167]:
A B C
0 1.0 5.0 1
In [168]:
# Dropping Missing values across columns:
df.dropna(axis = 1, inplace=False)
Out[168]:
C
0 1
1 2
2 3
In [169]:
# Dropping only if Missing values are above a threshold 'x': 
df.dropna(thresh=2)
Out[169]:
A B C
0 1.0 5.0 1
1 2.0 NaN 2
In [170]:
# Filling in all Missing Values with a column parameter:
df['B'].fillna(df['B'].mean(), inplace=True)
df
Out[170]:
A B C
0 1.0 5.0 1
1 2.0 5.0 2
2 NaN 5.0 3

2.2.7) Groupby Method:

The groupby method allows you to group rows of data together and call aggregate functions such as mean, sum, median etc.
In [177]:
# Create dataframe
data = {'Company':['GOOGLE','GOOGLE','AMAZON','AMAZON','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}

dfg = pd.DataFrame(data)
dfg
Out[177]:
Company Person Sales
0 GOOGLE Sam 200
1 GOOGLE Charlie 120
2 AMAZON Amy 340
3 AMAZON Vanessa 124
4 FB Carl 243
5 FB Sarah 350
In [180]:
# Forming a grouby based off a particular column:
df_by_company = dfg.groupby('Company')
In [181]:
# Calling the aggregate functions:
df_by_company.mean()
Out[181]:
Sales
Company
AMAZON 232.0
FB 296.5
GOOGLE 160.0
In [182]:
df_by_company.sum()
Out[182]:
Sales
Company
AMAZON 464
FB 593
GOOGLE 320
In [191]:
df_by_company.describe().transpose()
Out[191]:
Company AMAZON FB GOOGLE
Sales count 2.000000 2.000000 2.000000
mean 232.000000 296.500000 160.000000
std 152.735065 75.660426 56.568542
min 124.000000 243.000000 120.000000
25% 178.000000 269.750000 140.000000
50% 232.000000 296.500000 160.000000
75% 286.000000 323.250000 180.000000
max 340.000000 350.000000 200.000000
In [198]:
df_by_company.describe().transpose()['GOOGLE']
Out[198]:
Sales  count      2.000000
       mean     160.000000
       std       56.568542
       min      120.000000
       25%      140.000000
       50%      160.000000
       75%      180.000000
       max      200.000000
Name: GOOGLE, dtype: float64

2.2.8) Merging, Joining, and Concatenating:

In [199]:
# Creating DataFrames:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3'],
                        'C': ['C0', 'C1', 'C2', 'C3'],
                        'D': ['D0', 'D1', 'D2', 'D3']},
                        index=[0, 1, 2, 3])

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                        'B': ['B4', 'B5', 'B6', 'B7'],
                        'C': ['C4', 'C5', 'C6', 'C7'],
                        'D': ['D4', 'D5', 'D6', 'D7']},
                         index=[4, 5, 6, 7]) 

df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                        'B': ['B8', 'B9', 'B10', 'B11'],
                        'C': ['C8', 'C9', 'C10', 'C11'],
                        'D': ['D8', 'D9', 'D10', 'D11']},
                        index=[8, 9, 10, 11])
1) Concatenating DataFrames: Concatenation basically glues together DataFrames vertically or horizontally, one by one. Keep in mind that dimensions should match along the axis you are concatenating on. You can use pd.concat and pass in a list of DataFrames to concatenate together:
In [200]:
# Vertical Concatenation
pd.concat([df1, df2, df3])
Out[200]:
A B C D
0 A0 B0 C0 D0
1 A1 B1 C1 D1
2 A2 B2 C2 D2
3 A3 B3 C3 D3
4 A4 B4 C4 D4
5 A5 B5 C5 D5
6 A6 B6 C6 D6
7 A7 B7 C7 D7
8 A8 B8 C8 D8
9 A9 B9 C9 D9
10 A10 B10 C10 D10
11 A11 B11 C11 D11
In [201]:
# Horizontal Concatenation
pd.concat([df1, df2, df3], axis=1)
Out[201]:
A B C D A B C D A B C D
0 A0 B0 C0 D0 NaN NaN NaN NaN NaN NaN NaN NaN
1 A1 B1 C1 D1 NaN NaN NaN NaN NaN NaN NaN NaN
2 A2 B2 C2 D2 NaN NaN NaN NaN NaN NaN NaN NaN
3 A3 B3 C3 D3 NaN NaN NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN A4 B4 C4 D4 NaN NaN NaN NaN
5 NaN NaN NaN NaN A5 B5 C5 D5 NaN NaN NaN NaN
6 NaN NaN NaN NaN A6 B6 C6 D6 NaN NaN NaN NaN
7 NaN NaN NaN NaN A7 B7 C7 D7 NaN NaN NaN NaN
8 NaN NaN NaN NaN NaN NaN NaN NaN A8 B8 C8 D8
9 NaN NaN NaN NaN NaN NaN NaN NaN A9 B9 C9 D9
10 NaN NaN NaN NaN NaN NaN NaN NaN A10 B10 C10 D10
11 NaN NaN NaN NaN NaN NaN NaN NaN A11 B11 C11 D11
2) Merging DataFrames: The merge function allows you to merge DataFrames together using a similar logic as merging SQL Tables together.
In [ ]:
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                          'C': ['C0', 'C1', 'C2', 'C3'],
                          'D': ['D0', 'D1', 'D2', 'D3']})  
In [35]:
pd.merge(left,right,how='inner',on='key')
Out[35]:
A B key C D
0 A0 B0 K0 C0 D0
1 A1 B1 K1 C1 D1
2 A2 B2 K2 C2 D2
3 A3 B3 K3 C3 D3

Or to show a more complicated example:

In [37]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                     'key2': ['K0', 'K1', 'K0', 'K1'],
                        'A': ['A0', 'A1', 'A2', 'A3'],
                        'B': ['B0', 'B1', 'B2', 'B3']})
    
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                               'key2': ['K0', 'K0', 'K0', 'K0'],
                                  'C': ['C0', 'C1', 'C2', 'C3'],
                                  'D': ['D0', 'D1', 'D2', 'D3']})
In [39]:
pd.merge(left, right, on=['key1', 'key2'])
Out[39]:
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A2 B2 K1 K0 C1 D1
2 A2 B2 K1 K0 C2 D2
In [40]:
pd.merge(left, right, how='outer', on=['key1', 'key2'])
Out[40]:
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K0 K1 NaN NaN
2 A2 B2 K1 K0 C1 D1
3 A2 B2 K1 K0 C2 D2
4 A3 B3 K2 K1 NaN NaN
5 NaN NaN K2 K0 C3 D3
In [41]:
pd.merge(left, right, how='right', on=['key1', 'key2'])
Out[41]:
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A2 B2 K1 K0 C1 D1
2 A2 B2 K1 K0 C2 D2
3 NaN NaN K2 K0 C3 D3
In [42]:
pd.merge(left, right, how='left', on=['key1', 'key2'])
Out[42]:
A B key1 key2 C D
0 A0 B0 K0 K0 C0 D0
1 A1 B1 K0 K1 NaN NaN
2 A2 B2 K1 K0 C1 D1
3 A2 B2 K1 K0 C2 D2
4 A3 B3 K2 K1 NaN NaN
2) Joining DataFrames: Joining is a convenient method for combining the columns of two potentially differently-indexed DataFrames into a single result DataFrame.
In [46]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                      index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                    'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])
In [47]:
left.join(right)
Out[47]:
A B C D
K0 A0 B0 C0 D0
K1 A1 B1 NaN NaN
K2 A2 B2 C2 D2
In [48]:
left.join(right, how='outer')
Out[48]:
A B C D
K0 A0 B0 C0 D0
K1 A1 B1 NaN NaN
K2 A2 B2 C2 D2
K3 NaN NaN C3 D3
In [ ]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                      index=['K0', 'K1', 'K2']) 

right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                    'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])

left.join(right)

left.join(right, how='outer')

2.3) Pandas Operations

In [230]:
# Creating a DataFrame:
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()
Out[230]:
col1 col2 col3
0 1 444 abc
1 2 555 def
2 3 666 ghi
3 4 444 xyz
In [255]:
# Basic information about the DataFrame:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   a       4 non-null      int64
 1   b       4 non-null      int64
 2   c       4 non-null      int64
 3   d       4 non-null      int64
dtypes: int64(4)
memory usage: 256.0 bytes
In [236]:
# The list of all columns labels
df.columns
Out[236]:
Index(['col1', 'col2', 'col3'], dtype='object')
In [237]:
# The list of all row labels
df.index
Out[237]:
RangeIndex(start=0, stop=4, step=1)

2.3.1) Unique Values:

In [205]:
# Finding unique values in a column of a DataFrame:
df['col1'].unique()
Out[205]:
array([1, 2, 3, 4], dtype=int64)
In [207]:
# Finding the count of unique values in a column of a DataFrame:
df['col2'].nunique()
Out[207]:
3
In [209]:
# Finding the frequency of values in a column of a DataFrame:
df['col2'].value_counts()
Out[209]:
444    2
555    1
666    1
Name: col2, dtype: int64
In [240]:
# Finding the Null values in the DataFrame as a boolean
df.isnull()
Out[240]:
col1 col2 col3
0 False False False
1 False False False
2 False False False
3 False False False

2.3.2) Conditionals:

In [211]:
# Finding the values from a column satisfying a given condition:
df[df['col1']>1]
Out[211]:
col1 col2 col3
1 2 555 def
2 3 666 ghi
3 4 444 xyz
In [216]:
# Select from DataFrame using criteria from multiple columns:
df[(df['col1']>1) & (df['col2']>450)]
Out[216]:
col1 col2 col3
1 2 555 def
2 3 666 ghi

2.3.3) Applying functions:

In [219]:
# Applying a function on each value in a column:
def modulus(x):
    return x%10

df['col2'].apply(modulus)
Out[219]:
0    4
1    5
2    6
3    4
Name: col2, dtype: int64
In [220]:
df['col3'].apply(len)
Out[220]:
0    3
1    3
2    3
3    3
Name: col3, dtype: int64

2.3.4) Deleting & Adding a column:

In [233]:
# Drop a column
df.drop('col1', axis=1, inplace=False)
Out[233]:
col2 col3
0 444 abc
1 555 def
2 666 ghi
3 444 xyz
In [234]:
# Drop a row
df.drop(1, inplace=False)
Out[234]:
col1 col2 col3
0 1 444 abc
2 3 666 ghi
3 4 444 xyz
In [227]:
# Permanently delete a column
del df['col2']
df
Out[227]:
col1 col3
0 1 abc
1 2 def
2 3 ghi
3 4 xyz
In [231]:
# Permanently add a column
df['col2'] = [444, 555, 666, 444]
df
Out[231]:
col1 col2 col3
0 1 444 abc
1 2 555 def
2 3 666 ghi
3 4 444 xyz

2.3.5) Sorting the values in a column:

In [239]:
df.sort_values(by='col2')
Out[239]:
col1 col2 col3
0 1 444 abc
3 4 444 xyz
1 2 555 def
2 3 666 ghi

2.3.6) Pivot in a DataFrame:

In [242]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
     'B':['one','one','two','two','one','one'],
       'C':['x','y','x','y','x','y'],
       'D':[1,3,2,5,4,1]}

df = pd.DataFrame(data)
df
Out[242]:
A B C D
0 foo one x 1
1 foo one y 3
2 foo two x 2
3 bar two y 5
4 bar one x 4
5 bar one y 1
In [91]:
df.pivot_table(values='D',index=['A', 'B'],columns=['C'])
Out[91]:
C x y
A B
bar one 4.0 1.0
two NaN 5.0
foo one 1.0 3.0
two 2.0 NaN

2.4) Data Input and Output:

We can Input and Output to CSV or Excel files using Pandas.

2.4.1) Input and Output a CSV file:

In [248]:
df1 = pd.read_csv('example')
df1
Out[248]:
a b c d
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
3 12 13 14 15
In [251]:
df1.to_csv('example',index=False)

2.4.2) Input and Output an Excel file:

In [253]:
df2 = pd.read_excel('Excel_Sample.xlsx')
df2
Out[253]:
Unnamed: 0 a b c d
0 0 0 1 2 3
1 1 4 5 6 7
2 2 8 9 10 11
3 3 12 13 14 15
In [254]:
df2.to_excel('Excel_Sample.xlsx')

**3) Matplotlib:-**

Matplotlib is the "grandfather" library of data visualization with Python. It was created by John Hunter. He created it to try to replicate MatLab's (another programming language) plotting capabilities in Python. So if you happen to be familiar with matlab, matplotlib will feel natural to you. It is an excellent 2D and 3D graphics library for generating scientific figures. Some of the major Pros of Matplotlib are: Generally easy to get started for simple plots Support for custom labels and texts Great control of every element in a figure High-quality output in many formats Very customizable in general%matplotlib notebook provides an interactive environment.
In [4]:
import matplotlib.pyplot as plt
%matplotlib inline
# %matplotlib notebook

3.1) Basic Matplotlib Command:

In [9]:
x = np.linspace(0, 5, 11)
y = x ** 2

plt.plot(x, y, 'r--o') 
plt.xlabel('X Axis Title Here')
plt.ylabel('Y Axis Title Here')
plt.title('String Title Here');
In [10]:
# Multiple Subplots
# plt.subplot(nrows, ncols, plot_number)
plt.subplot(1,2,1)
plt.plot(x, y, 'r--') # More on color options later
plt.subplot(1,2,2)
plt.plot(y, x, 'g*-');

3.2) Matplotlib Object Oriented Method:

In [45]:
# Create Figure (empty canvas)
fig = plt.figure()

# Add set of axes to figure
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # left, bottom, width, height (range 0 to 1)

# Plot on that set of axes
axes.plot(x, y, 'b-*')
axes.set_xlabel('Set X Label') # Notice the use of set_ to begin methods
axes.set_ylabel('Set y Label')
axes.set_title('Set Title')
Out[45]:
Text(0.5, 1.0, 'Set Title')
In [53]:
# Creates blank canvas
fig = plt.figure()

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # main axes
axes2 = fig.add_axes([0.2, 0.5, 0.4, 0.3]) # inset axes

# Larger Figure Axes 1
axes1.plot(x, y, 'b-s')
axes1.set_xlabel('X_label_axes2')
axes1.set_ylabel('Y_label_axes2')
axes1.set_title('Axes 2 Title')

# Insert Figure Axes 2
axes2.plot(y, x, 'r-.')
axes2.set_xlabel('X_label_axes2')
axes2.set_ylabel('Y_label_axes2')
axes2.set_title('Axes 2 Title');

3.3) Matplotlib Subplots:

In [57]:
# We can create a tuple unpacking where the 'axes' is basically an array of all the axes
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(14,4))

# We can now call on each of the axes and plot
axes[0].plot(x, x**2, '--', 'r', label='x**2')
axes[1].plot(x, x**3, 'b-*', label='x**3')
axes[2].plot(x, x**2-5*x+6, 'g-s', label='x**2-5x+6')

# Set the Title and axes labels
axes[0].set_title('Linear')
axes[1].set_title('Cubic')
axes[2].set_title('Quadratic')

axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[1].set_xlabel('x')
axes[1].set_ylabel('y')
axes[2].set_xlabel('x')
axes[2].set_ylabel('y')

for ax in axes:
    ax.legend(loc=0)          # loc=0 finds the best location for the legend

plt.tight_layout()

3.4) Filling in the plots

In [16]:
linear_data = np.array([1,2,3,4,5,6,7,8])
exponential_data = linear_data**2

plt.figure()
# plot the linear data and the exponential data
plt.plot(linear_data, '-o', exponential_data, '-o')

# fill the area between the linear data and exponential data
plt.gca().fill_between(range(len(linear_data)), 
                       linear_data, exponential_data, 
                       facecolor='blue', 
                       alpha=0.25);

3.5) Plotting with Pandas:

In [22]:
# # Seaborn comes with built-in data sets!
df = sns.load_dataset('tips')
df.head()
Out[22]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [23]:
df.plot.box();
In [25]:
df.plot.hist();

3.6) Figure Size and DPI:

In [307]:
fig = plt.figure(figsize=(5, 4),dpi=80)

ax = fig.add_axes([0, 0, 1, 1])

ax.plot(x, x**2)
ax.set_title('Quadratic')
ax.set_xlabel('x')
ax.set_ylabel('y');

3.6) Custom Plot Range:

In [59]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))

axes[0].plot(x, x**2, x, x**3)
axes[0].set_title("default axes ranges")

axes[1].plot(x, x**2, x, x**3)
axes[1].axis('tight')
axes[1].set_title("tight axes")

axes[2].plot(x, x**2, x, x**3)
axes[2].set_ylim([0, 60])
axes[2].set_xlim([2, 5])
axes[2].set_title("custom axes range");

plt.tight_layout()

**4) Seaborn:-**

Seaborn is a Python data visualization library based on matplotlib. It provides a high-level interface for drawing attractive and informative statistical graphics.
In [5]:
import seaborn as sns
sns.set_style('darkgrid')
In [12]:
# # Seaborn comes with built-in data sets!
tips = sns.load_dataset('tips')
tips.head()
Out[12]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4

4.1) Distribution Plots:

4.1.1) Distplot:

The distplot shows the distribution of a univariate set (only one variable) of observations.
In [13]:
%%timeit -n 100   # IPython Magic function
sns.distplot(tips['total_bill'], kde=False);
9.19 ms ± 1.01 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [323]:
# We can also vary the bins size to get a more detailed distribution
sns.distplot(tips['total_bill'], kde=False, bins=40);

4.1.2) Jointplot:

The jointplot allows to basically match up two distplots for bivariate data. 1) scatter 2) reg 3) resid 4) kde 5) hex
In [14]:
# The default scatter kind plot
sns.jointplot(x='total_bill',y='tip',data=tips);
In [328]:
# The hexagonal plot
sns.jointplot(x='total_bill',y='tip',data=tips, kind='hex');
In [329]:
# The kde kind plot
sns.jointplot(x='total_bill',y='tip',data=tips, kind='kde');
In [330]:
# The regression plot
sns.jointplot(x='total_bill',y='tip',data=tips, kind='reg');
In [331]:
# The resid kind plot
sns.jointplot(x='total_bill',y='tip',data=tips, kind='resid');

4.1.3) Pairplot:

The pairplot will plot pairwise relationships across an entire dataframe (for the numerical columns) and supports a color hue argument (for categorical columns).
In [332]:
sns.pairplot(tips);
Out[332]:
<seaborn.axisgrid.PairGrid at 0x26c27cc54f0>
In [333]:
sns.pairplot(tips, hue='sex');

4.1.4) Rugplot:

In [338]:
sns.rugplot(tips['total_bill']);

4.2) Categorical Plots:

4.2.1) Barplot and Countplot:

These plots allow to get aggregate data off a categorical feature in the data. barplot is a general plot that allows to aggregate the categorical data based off some function, by default the mean. We can change the estimator object to another own function, that converts a vector to a scalar e.g. std dev, median. The Countplot is essentially the same as the barplot except the estimator is explicitly counting the number of occurrences, which is why we only pass in the x value, same as pd.value_counts() in a way.
In [339]:
# Barplot with a Categorical variable on the x-axis and a numerical variable on the y-axis compaing means
sns.barplot(x='sex', y='total_bill', data=tips);
In [342]:
# Barplot with a Categorical variable on the x-axis and a numerical variable on the y-axis comparing std dev
sns.barplot(x='sex', y='total_bill', data=tips, estimator=np.std);
In [343]:
# Barplot with a Categorical variable on the x-axis and a numerical variable on the y-axis comparing median
sns.barplot(x='sex', y='total_bill', data=tips, estimator=np.median);
In [345]:
# Counplot for categorical variables
sns.countplot(tips['sex']);
In [347]:
# Counplot for categorical variables with hue
sns.countplot(tips['size'], hue=tips['sex']);

4.2.2) Boxplot and Violinplot:

The boxplots and violinplots are used to shown the distribution of categorical data. A box plot (or box-and-whisker plot) shows the distribution of quantitative data in a way that facilitates comparisons between variables or across levels of a categorical variable. The box shows the quartiles of the dataset while the whiskers extend to show the rest of the distribution, except for points that are determined to be “outliers” using a method that is a function of the inter-quartile range. A violin plot plays a similar role as a box and whisker plot. It shows the distribution of quantitative data across several levels of one (or more) categorical variables such that those distributions can be compared. Unlike a box plot, in which all of the plot components correspond to actual datapoints, the violin plot features a kernel density estimation of the underlying distribution.
In [349]:
# A boxplot with categorical variable on the x-axis and numerical varaiable on the y-axis
sns.boxplot(x='day', y='total_bill', data=tips);
In [350]:
# A boxplot with categorical variable on the x-axis and numerical varaiable on the y-axis with hue on sex
sns.boxplot(x='day', y='total_bill', data=tips, hue='sex');
In [352]:
# A boxplot with categorical variable on the x-axis and numerical varaiable on the y-axis with hue on smoker
sns.boxplot(x='day', y='total_bill', data=tips, hue='smoker');
In [353]:
# A violinplot with exact same arguments as the boxplot
sns.violinplot(x='day', y='total_bill', data=tips, hue='smoker');
In [356]:
# We can also merge these two hues with split
sns.violinplot(x='day', y='total_bill', data=tips, hue='smoker', split=True);

4.2.3) Factor Plots:

The factorplot is the most general form of a categorical plot. It can take in a kind parameter to adjust the plot type.
In [358]:
sns.factorplot(x='day', y='total_bill', data=tips, kind='bar');

4.3) Matrix Plots:

The Matrix plots allow to plot data as color-encoded matrices and can also be used to indicate clusters within the data.
In [360]:
flights = sns.load_dataset('flights')
In [361]:
flights.head()
Out[361]:
year month passengers
0 1949 January 112
1 1949 February 118
2 1949 March 132
3 1949 April 129
4 1949 May 121

4.3.1) Heatmaps:

In order for a heatmap to work properly, the data should already be in a matrix form, the sns.heatmap function basically just colors it in. In order to convert a DataFrame into its Matrix form, we use the df.corr() function.
In [388]:
sns.heatmap(tips.corr(),  annot=True, cmap='magma', lw=1, linecolor='black');
In [380]:
sns.heatmap(flights.corr(),  annot=True, cmap='RdBu_r');

4.4) Grid Plots:

Grids are general types of plots that allow to map plot types to rows and columns of a grid, this helps to create similar plots separated by features.
In [389]:
iris = sns.load_dataset('iris')
iris.head()
Out[389]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa

4.4.1) PairGrid:

Pairgrid is a subplot grid for plotting pairwise relationships in a dataset.
In [396]:
# First create a PairGrid
g = sns.PairGrid(iris)

# Now plot or map on this PairGrid at upper,lower, and diagonal locations
g.map_diag(sns.distplot);     # On the diagonal
g.map_upper(plt.scatter);     # On the upper triangle
g.map_lower(sns.kdeplot);     # On the lower triangle

4.4.2) FacetGrid:

FacetGrid is the general way to create grids of plots based off of a feature.
In [412]:
# Create an empty FacetGrid
fg = sns.FacetGrid(tips, col='time', row='sex')

# Now map or plot to this grid with an uniariate plot
fg.map(sns.distplot, 'total_bill');
In [415]:
# Create an empty FacetGrid
fg1 = sns.FacetGrid(tips, col='time', row='sex')

# Now map or plot to this grid with a bivariate plot
fg1.map(sns.scatterplot, 'total_bill', 'tip');

4.5) Regression Plot:

The lmplot() allows to display linear models, but it also conveniently allows to split up those plots based off of features, as well as coloring the hue based off of features.
In [416]:
sns.lmplot(x='total_bill', y='tip', data=tips);
In [418]:
# We can include a hue as well
sns.lmplot(x='total_bill', y='tip', data=tips, hue='sex', markers=['o', 'v']);

Using a Grid

We can add more variable separation through columns and rows with the use of a grid. Just indicate this with the col or row arguments:
In [419]:
sns.lmplot(x='total_bill',y='tip',data=tips,col='sex');
In [454]:
# We just need to include the column and rows (no need for any grid)
sns.lmplot(x="total_bill", y="tip", row="sex", col="time",data=tips);
In [524]:
# We can control the height and the width of the plots using aspect and height arguments
sns.lmplot(x='total_bill',y='tip',data=tips,col='day',hue='sex',palette='seismic', aspect=0.8, height=5);

4.6) Seaborn Style and Color:

4.6.1) Plot Style and Spines:

The Plot style can be set suing sns.set_style() with four arguments as- 1) whitegrid 2) darkgrid 3) white 4) dark 5) ticks The Spines around the plots can be removed using sns.despine()
In [473]:
sns.set_style('white')

sns.countplot(tips['sex']);
sns.despine()

4.6.2) Size and Aspect

We can use matplotlib's **plt.figure(figsize=(width,height) ** to change the size of most seaborn plots. We can control the size and aspect ratio of most seaborn grid plots by passing in parameters: size, and aspect. For example:
In [509]:
# Non Grid Plot
plt.figure(figsize=(6,4))
sns.countplot(x='sex',data=tips);
In [508]:
# Grid Type Plot
sns.lmplot(x='total_bill',y='tip',height=5,aspect=0.8,data=tips);

4.6.3) Scale and Context

The set_context() allows to override default parameters. The context and font_scale can be changed. The context can be changed to: 1) paper 2) notebook 3) poster 4) talk

Palettes

In [16]:
print('We can get more colormaps and pallettes from:')
display.Image("./Matplotlib Colormaps.png", width=500, height=50)
We can get more colormaps and pallettes from:
Out[16]:
In [518]:
sns.set_context('notebook', font_scale=1);
sns.countplot(x='sex',data=tips,palette='seismic');
In general, for Seaborn plotting, start off with- 1) sns.set_style('white'); 2) sns.set_context('notebook', font_scale=1); 3) sns.set_palette("GnBu_d") or 'seismic' or 'magma'
In [6]:
sns.set_style('whitegrid');
sns.set_context('notebook', font_scale=1);
sns.set_palette("RdBu_r")

**5) Machine Learning with Python:-**

The main libraries to be used for Machine Learning in Python is the 'Scikit-learn'. The Algorithmic Cheat sheet is as displayed below: download%20%285%29.png

**6) Linear Regression:-**

The Linear Regression is the simplet regression and prediction algorithm for continuous outputs.
In [7]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split

from sklearn.linear_model import LinearRegression       # The Linear Regression Model

from sklearn import metrics                             # For Regression Evaluation Metrics
In [17]:
# Importing the dataset for predicting the Price of a house:
df = pd.read_csv('USA_Housing-Copy1.csv')
In [19]:
df.head()
Out[19]:
Avg. Area Income Avg. Area House Age Avg. Area Number of Rooms Avg. Area Number of Bedrooms Area Population Price Address
0 79545.458574 5.682861 7.009188 4.09 23086.800503 1.059034e+06 208 Michael Ferry Apt. 674\nLaurabury, NE 3701...
1 79248.642455 6.002900 6.730821 3.09 40173.072174 1.505891e+06 188 Johnson Views Suite 079\nLake Kathleen, CA...
2 61287.067179 5.865890 8.512727 5.13 36882.159400 1.058988e+06 9127 Elizabeth Stravenue\nDanieltown, WI 06482...
3 63345.240046 7.188236 5.586729 3.26 34310.242831 1.260617e+06 USS Barnett\nFPO AP 44820
4 59982.197226 5.040555 7.839388 4.23 26354.109472 6.309435e+05 USNS Raymond\nFPO AE 09386
In [20]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 7 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Avg. Area Income              5000 non-null   float64
 1   Avg. Area House Age           5000 non-null   float64
 2   Avg. Area Number of Rooms     5000 non-null   float64
 3   Avg. Area Number of Bedrooms  5000 non-null   float64
 4   Area Population               5000 non-null   float64
 5   Price                         5000 non-null   float64
 6   Address                       5000 non-null   object 
dtypes: float64(6), object(1)
memory usage: 273.6+ KB
In [21]:
df.describe().transpose()
Out[21]:
count mean std min 25% 50% 75% max
Avg. Area Income 5000.0 6.858311e+04 10657.991214 17796.631190 61480.562388 6.880429e+04 7.578334e+04 1.077017e+05
Avg. Area House Age 5000.0 5.977222e+00 0.991456 2.644304 5.322283 5.970429e+00 6.650808e+00 9.519088e+00
Avg. Area Number of Rooms 5000.0 6.987792e+00 1.005833 3.236194 6.299250 7.002902e+00 7.665871e+00 1.075959e+01
Avg. Area Number of Bedrooms 5000.0 3.981330e+00 1.234137 2.000000 3.140000 4.050000e+00 4.490000e+00 6.500000e+00
Area Population 5000.0 3.616352e+04 9925.650114 172.610686 29403.928702 3.619941e+04 4.286129e+04 6.962171e+04
Price 5000.0 1.232073e+06 353117.626581 15938.657923 997577.135049 1.232669e+06 1.471210e+06 2.469066e+06

6.1) EDA:

In [34]:
sns.pairplot(df);
In [42]:
fig, axes = plt.subplots(1, 5, figsize=(16, 4))

sns.distplot(df['Avg. Area Income'], ax=axes[0]);
sns.distplot(df['Avg. Area House Age'], ax=axes[1]);
sns.distplot(df['Avg. Area Number of Rooms'], ax=axes[2]);
sns.distplot(df['Avg. Area Number of Bedrooms'], ax=axes[3]);
sns.distplot(df['Area Population'], ax=axes[4]);

plt.tight_layout()
In [50]:
fig, axes = plt.subplots(1, 5, figsize=(14, 4))

sns.boxplot(df['Avg. Area Income'], ax=axes[0], orient='v');
sns.boxplot(df['Avg. Area House Age'], ax=axes[1], orient='v');
sns.boxplot(df['Avg. Area Number of Rooms'], ax=axes[2], orient='v');
sns.boxplot(df['Avg. Area Number of Bedrooms'], ax=axes[3], orient='v');
sns.boxplot(df['Area Population'], ax=axes[4], orient='v');

plt.tight_layout()
In [54]:
sns.heatmap(df.corr(), cmap='magma', annot=True);

6.2) Training our Linear Regression Model

Let's now begin to train out regression model! We will need to first split up our data into an X array that contains the features to train on, and a y array with the target variable, in this case the Price column. We will toss out the Address column because it only has text info that the linear regression model can't use.
In [84]:
# Columns having Numerical data are
numeric_features = df.select_dtypes(include=[np.number])
print(list(numeric_features.columns))
['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms', 'Avg. Area Number of Bedrooms', 'Area Population', 'Price']
In [60]:
# Creating our X and y arrays yb separating the target labels column:
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
               'Avg. Area Number of Bedrooms', 'Area Population']]
y = df['Price']

6.2.1) Train Test split:

In [61]:
# The order of the split variables is important
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

6.2.2) Creating and Training the Model

In [62]:
lm = LinearRegression()
lm.fit(X_train, y_train)                   # Fit the model on the train split
Out[62]:
LinearRegression()

6.2.3) Model Evaluation

Let's evaluate the modeel by checking out the intercept and coefficients. The linear model is given by- y = (intercept) + (coeff_1)*x1 + (coeff_2)*x2 + (coeff_3)*x3 + (coeff_4)*x4 ...
In [63]:
# The intercept of the linear model:
lm.intercept_
Out[63]:
-2641372.667301679
In [65]:
# For coefficients can be properly shown in a DataFrame:
# print(lm.coef_)
coeff_df = pd.DataFrame(lm.coef_,X.columns,columns=['Coefficient'])
coeff_df
Out[65]:
Coefficient
Avg. Area Income 21.617635
Avg. Area House Age 165221.119872
Avg. Area Number of Rooms 121405.376596
Avg. Area Number of Bedrooms 1318.718783
Area Population 15.225196

Interpreting the coefficients:

  • Holding all other features fixed, a 1 unit increase in Avg. Area Income is associated with an increase of \$21.52 .
  • Holding all other features fixed, a 1 unit increase in Avg. Area House Age is associated with an increase of \$164883.28 .
  • Holding all other features fixed, a 1 unit increase in Avg. Area Number of Rooms is associated with an increase of \$122368.67 .
  • Holding all other features fixed, a 1 unit increase in Avg. Area Number of Bedrooms is associated with an increase of \$2233.80 .
  • Holding all other features fixed, a 1 unit increase in Area Population is associated with an increase of \$15.15 .

Does this make sense? Probably not because I made up this data. If you want real data to repeat this sort of analysis, check out the boston dataset:

from sklearn.datasets import load_boston
boston = load_boston()
print(boston.DESCR)
boston_df = boston.data

6.2.4) Predictions from our Model

Let's grab predictions off our test set and see how well it did!
In [67]:
# Let's fetch the y_test lavels:
predictions = lm.predict(X_test)
predictions
[1258934.89505291  822694.63411034 1742214.39530127 ... 1452640.96473346
  645797.13026303 1364943.76221896]
In [78]:
sns.jointplot(y_test, predictions, kind='scatter');

Residual Histogram

In [71]:
# A close to normal distribution of Residuals indicates correctness of the model
sns.distplot((y_test-predictions), bins=50);

6.4) Regression Evaluation Metrics

Here are three common evaluation metrics for regression problems:

Mean Absolute Error (MAE) is the mean of the absolute value of the errors:

$$\frac 1n\sum_{i=1}^n|y_i-\hat{y}_i|$$

Mean Squared Error (MSE) is the mean of the squared errors:

$$\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2$$

Root Mean Squared Error (RMSE) is the square root of the mean of the squared errors:

$$\sqrt{\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2}$$

Comparing these metrics:

  • MAE is the easiest to understand, because it's the average error.
  • MSE is more popular than MAE, because MSE "punishes" larger errors, which tends to be useful in the real world.
  • RMSE is even more popular than MSE, because RMSE is interpretable in the "y" units.

All of these are loss functions, because we want to minimize them.

In [80]:
print('MAE:', metrics.mean_absolute_error(y_test, predictions))
print('MSE:', metrics.mean_squared_error(y_test, predictions))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))
MAE: 81257.55795856068
MSE: 10169125565.897734
RMSE: 100842.08231635111
In [87]:
# The R^2 value can also be obtained as:
print('R^2 score: ', metrics.explained_variance_score(y_test, predictions))
R^2 score:  0.918582277096527

**7) Logistic Regression:-**

The Logistic Regression is the simplet regression and prediction algorithm for categorical outputs.
In [64]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split
from sklearn.linear_model import LogisticRegression     # The Linear Regression Model
from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [241]:
# Importing the dataset for predicting the Price of a house:
df = pd.read_csv('titanic_train-Copy1.csv')
In [242]:
df.head()
Out[242]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
In [243]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
In [244]:
df.describe().transpose()
Out[244]:
count mean std min 25% 50% 75% max
PassengerId 891.0 446.000000 257.353842 1.00 223.5000 446.0000 668.5 891.0000
Survived 891.0 0.383838 0.486592 0.00 0.0000 0.0000 1.0 1.0000
Pclass 891.0 2.308642 0.836071 1.00 2.0000 3.0000 3.0 3.0000
Age 714.0 29.699118 14.526497 0.42 20.1250 28.0000 38.0 80.0000
SibSp 891.0 0.523008 1.102743 0.00 0.0000 0.0000 1.0 8.0000
Parch 891.0 0.381594 0.806057 0.00 0.0000 0.0000 0.0 6.0000
Fare 891.0 32.204208 49.693429 0.00 7.9104 14.4542 31.0 512.3292
In [245]:
df.nunique()
Out[245]:
PassengerId    891
Survived         2
Pclass           3
Name           891
Sex              2
Age             88
SibSp            7
Parch            7
Ticket         681
Fare           248
Cabin          147
Embarked         3
dtype: int64

7.1) Handling Missing Data

In [246]:
dfn = df.isnull().describe().transpose()
dfn[dfn['unique']==2]
Out[246]:
count unique top freq
Age 891 2 False 714
Cabin 891 2 True 687
Embarked 891 2 False 889
In [247]:
# We can also use heatmaps to identify missing data
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='seismic');
We want to fill in missing age data instead of just dropping the missing age data rows. One way to do this is by filling in the mean age of all the passengers (imputation). However we can be smarter about this and check the average age by passenger class. For example:
In [248]:
plt.figure(figsize=(12, 7))
sns.boxplot(x='Pclass',y='Age',data=df,palette='winter');
We can see the wealthier passengers in the higher classes tend to be older, which makes sense. We'll use these average age values to impute based on Pclass for Age.
In [249]:
def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):

        if Pclass == 1:
            return 37

        elif Pclass == 2:
            return 29

        else:
            return 24

    else:
        return Age
Now apply that function!
In [250]:
df['Age'] = df[['Age','Pclass']].apply(impute_age,axis=1)
Now let's check that heat map again!
In [251]:
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='seismic');
Great! Let's go ahead and drop the Cabin column and the row in Embarked that is NaN.
In [252]:
df.drop('Cabin',axis=1,inplace=True)
In [253]:
df.head()
Out[253]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 S
In [254]:
df.dropna(inplace=True)
Now let's check that heat map again!
In [255]:
sns.heatmap(df.isnull(),yticklabels=False,cbar=False,cmap='seismic');

7.2) EDA:

In [256]:
# Columns having Numerical data are
numeric_features = df.select_dtypes(include=[np.number])
print(list(numeric_features.columns))
['PassengerId', 'Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
In [257]:
sns.pairplot(df);
In [258]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

sns.distplot(df['Age'], ax=axes[0]);
sns.distplot(df['Fare'], ax=axes[1]);

plt.tight_layout()
In [259]:
fig, axes = plt.subplots(2, 3, figsize=(12, 6))

sns.countplot(df['Survived'], ax=axes[0][0]);
sns.countplot(df['Pclass'], ax=axes[0][1]);
sns.countplot(df['Sex'], ax=axes[0][2]);
sns.countplot(df['SibSp'], ax=axes[1][0]);
sns.countplot(df['Parch'], ax=axes[1][1]);
sns.countplot(df['Embarked'], ax=axes[1][2]);

plt.tight_layout()
In [260]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))

sns.boxplot(df['Age'], ax=axes[0], hue=df['Survived'], orient='v');
sns.countplot(df['Sex'], ax=axes[1], hue=df['Survived']);
sns.countplot(df['Pclass'], ax=axes[2], hue=df['Survived']);

plt.tight_layout()

7.3) Converting Categorical Features (Dummy Encoding)

We'll need to convert categorical features to dummy variables using pandas! Otherwise our machine learning algorithm won't be able to directly take in those features as inputs. We will use pandas pd.get_dummies() method to get dummy numerical columns for each of the category of a categorical feature. Now, for a binary (as well as multiple) categorical column, one category column would be collinear with the other other category column. This would create the multicollinearity issue. In order to avoid it, we will use the drop_first=True method to drop one of the columns.
In [261]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 889 entries, 0 to 890
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  889 non-null    int64  
 1   Survived     889 non-null    int64  
 2   Pclass       889 non-null    int64  
 3   Name         889 non-null    object 
 4   Sex          889 non-null    object 
 5   Age          889 non-null    float64
 6   SibSp        889 non-null    int64  
 7   Parch        889 non-null    int64  
 8   Ticket       889 non-null    object 
 9   Fare         889 non-null    float64
 10  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(4)
memory usage: 123.3+ KB
In [262]:
gender = pd.get_dummies(df['Sex'], drop_first=True)
embark = pd.get_dummies(df['Embarked'], drop_first=True)
# pclass = pd.get_dummies(df['Pclass'], drop_first=True)
In [263]:
df.drop(['Sex', 'Embarked','Name','Ticket'], axis=1, inplace=True)
In [264]:
df = pd.concat([df, gender, embark], axis=1)
In [265]:
df.drop('PassengerId', axis=1, inplace=True)
In [266]:
df.head()
Out[266]:
Survived Pclass Age SibSp Parch Fare male Q S
0 0 3 22.0 1 0 7.2500 1 0 1
1 1 1 38.0 1 0 71.2833 0 0 0
2 1 3 26.0 0 0 7.9250 0 0 1
3 1 1 35.0 1 0 53.1000 0 0 1
4 0 3 35.0 0 0 8.0500 1 0 1

7.3) Building a Logistic Regression model

Let's start by splitting our data into a training set and test set.

7.3.1) Train Test Split

In [267]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('Survived',axis=1), df['Survived'], test_size=0.30, random_state=101)

7.3.2) Training and Predicting

In [268]:
logmodel = LogisticRegression()
logmodel.fit(X_train, y_train);
C:\Users\Hrishikesh\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:762: ConvergenceWarning:

lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression

In [269]:
predictions = logmodel.predict(X_test);

7.3.3) Evaluation

We can check Precision, Recall, F1-score using the Classification Report.
In [270]:
print(classification_report(y_test,predictions))
              precision    recall  f1-score   support

           0       0.83      0.90      0.86       163
           1       0.82      0.71      0.76       104

    accuracy                           0.83       267
   macro avg       0.83      0.81      0.81       267
weighted avg       0.83      0.83      0.83       267

In [271]:
print('The Confusion Matrix is: \n')
matrixc = confusion_matrix(y_test, predictions)
print(matrixc)
matrix = plot_confusion_matrix(logmodel ,X_test, y_test, values_format='d')
matrix.ax_.set_title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.gcf().axes[0].tick_params(color='black')
plt.gcf().set_size_inches(4,3)
plt.show()
The Confusion Matrix is: 

[[147  16]
 [ 30  74]]

**8) Plotly and Cufflinks:-**

In [272]:
# Importing the required libraries:
import cufflinks as cf
cf.go_offline()
In [ ]:
 
In [ ]:
 
In [ ]:
 

**9) K-Nearest Neightbours:-**

We have been given a classified data set from a company! It has hidden feature column names but have given us the data and the target classes. We'll try to use KNN to create a model that directly predicts a class for a new data point based off of the features.
In [16]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split

from sklearn.preprocessing import StandardScaler        # For scaling the dataset
from sklearn.neighbors import KNeighborsClassifier      # The KNN Model

from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [27]:
df = pd.read_csv('KNN_Project_Data-Copy1')
In [28]:
df.head()
Out[28]:
XVPM GWYH TRAT TLLZ IGGA HYKR EDFS GUUB MGJM JHZC TARGET CLASS
0 1636.670614 817.988525 2565.995189 358.347163 550.417491 1618.870897 2147.641254 330.727893 1494.878631 845.136088 0
1 1013.402760 577.587332 2644.141273 280.428203 1161.873391 2084.107872 853.404981 447.157619 1193.032521 861.081809 1
2 1300.035501 820.518697 2025.854469 525.562292 922.206261 2552.355407 818.676686 845.491492 1968.367513 1647.186291 1
3 1059.347542 1066.866418 612.000041 480.827789 419.467495 685.666983 852.867810 341.664784 1154.391368 1450.935357 0
4 1018.340526 1313.679056 950.622661 724.742174 843.065903 1370.554164 905.469453 658.118202 539.459350 1899.850792 0
In [29]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   XVPM          1000 non-null   float64
 1   GWYH          1000 non-null   float64
 2   TRAT          1000 non-null   float64
 3   TLLZ          1000 non-null   float64
 4   IGGA          1000 non-null   float64
 5   HYKR          1000 non-null   float64
 6   EDFS          1000 non-null   float64
 7   GUUB          1000 non-null   float64
 8   MGJM          1000 non-null   float64
 9   JHZC          1000 non-null   float64
 10  TARGET CLASS  1000 non-null   int64  
dtypes: float64(10), int64(1)
memory usage: 86.1 KB
In [30]:
df.describe().transpose()
Out[30]:
count mean std min 25% 50% 75% max
XVPM 1000.0 1055.071157 370.980193 21.17 767.413366 1045.904805 1326.065178 2117.0
GWYH 1000.0 991.851567 392.278890 21.72 694.859326 978.355081 1275.528770 2172.0
TRAT 1000.0 1529.373525 640.286092 31.80 1062.600806 1522.507269 1991.128626 3180.0
TLLZ 1000.0 495.107156 142.789188 8.45 401.788135 500.197421 600.525709 845.0
IGGA 1000.0 940.590072 345.923136 17.93 700.763295 939.348662 1182.578166 1793.0
HYKR 1000.0 1550.637455 493.491988 27.93 1219.267077 1564.996551 1891.937040 2793.0
EDFS 1000.0 1561.003252 598.608517 31.96 1132.097865 1565.882879 1981.739411 3196.0
GUUB 1000.0 561.346117 247.357552 13.52 381.704293 540.420379 725.762027 1352.0
MGJM 1000.0 1089.067338 402.666953 23.21 801.849802 1099.087954 1369.923665 2321.0
JHZC 1000.0 1452.521629 568.132005 30.89 1059.499689 1441.554053 1864.405512 3089.0
TARGET CLASS 1000.0 0.500000 0.500250 0.00 0.000000 0.500000 1.000000 1.0
In [31]:
df.nunique()
Out[31]:
XVPM            1000
GWYH            1000
TRAT            1000
TLLZ            1000
IGGA            1000
HYKR            1000
EDFS            1000
GUUB            1000
MGJM            1000
JHZC            1000
TARGET CLASS       2
dtype: int64
In [32]:
lc = list(df.columns)
In [33]:
fig, axes = plt.subplots(2, 5, figsize=(16, 4))

j = 0
c = 0
for i in range(len(lc)-1):  
    sns.distplot(df[lc[i]], ax=axes[c][j]);
    j += 1
    if i == 4:
        c, j = 1, 0
    
plt.tight_layout()

9.1) Standard Scaler on the variables:

Because the KNN classifier predicts the class of a given test observation by identifying the observations that are nearest to it, the scale of the variables matters. Any variables that are on a large scale will have a much larger effect on the distance between the observations, and hence on the KNN classifier, than variables that are on a small scale.
In [34]:
# Create a StandardScaler:
scaler = StandardScaler()
In [35]:
# Fit the Scaler to the dataset excluding the labels:
scaler.fit(df.drop('TARGET CLASS', axis=1))
Out[35]:
StandardScaler()
In [36]:
# Scale or standardise the dataset:
scaled = scaler.transform(df.drop('TARGET CLASS', axis=1))
In [37]:
# Recreate the DataFrame
df1 = pd.DataFrame(scaled, columns = df.columns[:-1])
df1.head()
Out[37]:
XVPM GWYH TRAT TLLZ IGGA HYKR EDFS GUUB MGJM JHZC
0 1.568522 -0.443435 1.619808 -0.958255 -1.128481 0.138336 0.980493 -0.932794 1.008313 -1.069627
1 -0.112376 -1.056574 1.741918 -1.504220 0.640009 1.081552 -1.182663 -0.461864 0.258321 -1.041546
2 0.660647 -0.436981 0.775793 0.213394 -0.053171 2.030872 -1.240707 1.149298 2.184784 0.342811
3 0.011533 0.191324 -1.433473 -0.100053 -1.507223 -1.753632 -1.183561 -0.888557 0.162310 -0.002793
4 -0.099059 0.820815 -0.904346 1.609015 -0.282065 -0.365099 -1.095644 0.391419 -1.365603 0.787762

9.2) Training the KNN Model:

In [38]:
X_train, X_test, y_train, y_test = train_test_split(df1,df['TARGET CLASS'], test_size=0.30)
In [39]:
knn = KNeighborsClassifier()
In [40]:
knn.fit(X_train, y_train)
Out[40]:
KNeighborsClassifier()

9.3) Predictions and Model Evaluation:

In [41]:
predictions = knn.predict(X_test)
In [42]:
print(classification_report(y_test,predictions))
              precision    recall  f1-score   support

           0       0.79      0.80      0.79       143
           1       0.81      0.81      0.81       157

    accuracy                           0.80       300
   macro avg       0.80      0.80      0.80       300
weighted avg       0.80      0.80      0.80       300

In [43]:
print('The Confusion Matrix is: \n')
matrixc = confusion_matrix(y_test, predictions)
print(matrixc)
matrix = plot_confusion_matrix(knn ,X_test, y_test, values_format='d')
matrix.ax_.set_title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.gcf().axes[0].tick_params(color='black')
plt.gcf().set_size_inches(4,3)
plt.show()
The Confusion Matrix is: 

[[114  29]
 [ 30 127]]

9.4) Choosing a better K value:

We can fine tune our KNN Model using the Elbow method to pick a good K Value.
In [44]:
error_rate = []
d = dict()

for i in range(1, 51):
    knn_c = KNeighborsClassifier(n_neighbors=i)
    knn_c.fit(X_train, y_train)
    pred_i = knn_c.predict(X_test)
    error_rate.append(np.mean(pred_i != y_test))
    d[i] = np.mean(pred_i != y_test)
In [368]:
sns.jointplot(range(1, 51), error_rate, size=5);
C:\Users\Hrishikesh\anaconda3\lib\site-packages\seaborn\axisgrid.py:2264: UserWarning:

The `size` parameter has been renamed to `height`; please update your code.

In [45]:
mn = min(d.values())
for i in d:
    if d[i] == mn:
        print(i,':', mn)
33 : 0.17333333333333334

9.5) Comparing improved performance with K value:

In [46]:
# KNN With K=1
knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(X_train,y_train)
pred = knn.predict(X_test)

print('KNN with K=1:')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))
print('*******************************************************')

# KNN With K=31
knn = KNeighborsClassifier(n_neighbors=31)

knn.fit(X_train,y_train)
pred = knn.predict(X_test)

print('KNN with K=31:')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))
KNN with K=1:


[[105  38]
 [ 41 116]]


              precision    recall  f1-score   support

           0       0.72      0.73      0.73       143
           1       0.75      0.74      0.75       157

    accuracy                           0.74       300
   macro avg       0.74      0.74      0.74       300
weighted avg       0.74      0.74      0.74       300

*******************************************************
KNN with K=31:


[[114  29]
 [ 25 132]]


              precision    recall  f1-score   support

           0       0.82      0.80      0.81       143
           1       0.82      0.84      0.83       157

    accuracy                           0.82       300
   macro avg       0.82      0.82      0.82       300
weighted avg       0.82      0.82      0.82       300

**10) Decision Trees and Random Forest:-**

Random Forest is one of the most common ensemble methods, which consists of a collection of Decision Trees. Random Forests are an example of an ensemble method, in which we combine multiple machine learning algorithms to obtain better predictive performance. We’ll run multiple models on the data and use the aggregate predictions, which will be better than a single model alone.
In [371]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split

from sklearn.tree import DecisionTreeClassifier         # For Decision Trees
from sklearn.ensemble import RandomForestClassifier     # For Random Forest

from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [372]:
df = pd.read_csv('kyphosis-Copy1.csv')
df.head()
Out[372]:
Kyphosis Age Number Start
0 absent 71 3 5
1 absent 158 3 14
2 present 128 4 5
3 absent 2 5 1
4 absent 1 4 15
In [373]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 81 entries, 0 to 80
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Kyphosis  81 non-null     object
 1   Age       81 non-null     int64 
 2   Number    81 non-null     int64 
 3   Start     81 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 2.7+ KB
In [374]:
df.describe().transpose()
Out[374]:
count mean std min 25% 50% 75% max
Age 81.0 83.654321 58.104251 1.0 26.0 87.0 130.0 206.0
Number 81.0 4.049383 1.619423 2.0 3.0 4.0 5.0 10.0
Start 81.0 11.493827 4.883962 1.0 9.0 13.0 16.0 18.0
In [375]:
# Columns having Numerical data are
numeric_features = df.select_dtypes(include=[np.number])
print(list(numeric_features.columns))
['Age', 'Number', 'Start']
In [376]:
sns.pairplot(df, hue='Kyphosis');
In [377]:
df.nunique()
Out[377]:
Kyphosis     2
Age         64
Number       8
Start       16
dtype: int64
In [378]:
X = df[['Age', 'Number', 'Start']]
y = df['Kyphosis']

10.1) Training the Decision Tree Model

In [379]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
In [380]:
dtree = DecisionTreeClassifier()
In [381]:
dtree.fit(X_train, y_train)
Out[381]:
DecisionTreeClassifier()
In [382]:
predictions = dtree.predict(X_test)
In [385]:
print(classification_report(y_test,predictions))
print('The Confusion Matrix is: \n')
matrixc = confusion_matrix(y_test, predictions)
print(matrixc)
              precision    recall  f1-score   support

      absent       0.76      0.89      0.82        18
     present       0.50      0.29      0.36         7

    accuracy                           0.72        25
   macro avg       0.63      0.59      0.59        25
weighted avg       0.69      0.72      0.69        25

The Confusion Matrix is: 

[[16  2]
 [ 5  2]]

10.2) Training the Random Forest Model

In [393]:
rfc = RandomForestClassifier(n_estimators=100)
In [394]:
rfc.fit(X_train, y_train)
Out[394]:
RandomForestClassifier()
In [396]:
rfc_predictions = rfc.predict(X_test)
In [397]:
print(classification_report(y_test, rfc_predictions))
print('The Confusion Matrix is: \n')
matrixc = confusion_matrix(y_test, rfc_predictions)
print(matrixc)
              precision    recall  f1-score   support

      absent       0.77      0.94      0.85        18
     present       0.67      0.29      0.40         7

    accuracy                           0.76        25
   macro avg       0.72      0.62      0.62        25
weighted avg       0.74      0.76      0.72        25

The Confusion Matrix is: 

[[17  1]
 [ 5  2]]

**11) Support Vector Machines (SVM):-**

In [65]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split

from sklearn.svm import SVC                             # The Support Vector Classifier
from sklearn.model_selection import GridSearchCV        # For Grid Search

from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [2]:
from sklearn.datasets import load_breast_cancer
In [3]:
cancer = load_breast_cancer()
In [4]:
print(cancer['DESCR'])
.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radius, field
        10 is Radius SE, field 20 is Worst Radius.

        - class:
                - WDBC-Malignant
                - WDBC-Benign

    :Summary Statistics:

    ===================================== ====== ======
                                           Min    Max
    ===================================== ====== ======
    radius (mean):                        6.981  28.11
    texture (mean):                       9.71   39.28
    perimeter (mean):                     43.79  188.5
    area (mean):                          143.5  2501.0
    smoothness (mean):                    0.053  0.163
    compactness (mean):                   0.019  0.345
    concavity (mean):                     0.0    0.427
    concave points (mean):                0.0    0.201
    symmetry (mean):                      0.106  0.304
    fractal dimension (mean):             0.05   0.097
    radius (standard error):              0.112  2.873
    texture (standard error):             0.36   4.885
    perimeter (standard error):           0.757  21.98
    area (standard error):                6.802  542.2
    smoothness (standard error):          0.002  0.031
    compactness (standard error):         0.002  0.135
    concavity (standard error):           0.0    0.396
    concave points (standard error):      0.0    0.053
    symmetry (standard error):            0.008  0.079
    fractal dimension (standard error):   0.001  0.03
    radius (worst):                       7.93   36.04
    texture (worst):                      12.02  49.54
    perimeter (worst):                    50.41  251.2
    area (worst):                         185.2  4254.0
    smoothness (worst):                   0.071  0.223
    compactness (worst):                  0.027  1.058
    concavity (worst):                    0.0    1.252
    concave points (worst):               0.0    0.291
    symmetry (worst):                     0.156  0.664
    fractal dimension (worst):            0.055  0.208
    ===================================== ====== ======

    :Missing Attribute Values: None

    :Class Distribution: 212 - Malignant, 357 - Benign

    :Creator:  Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian

    :Donor: Nick Street

    :Date: November, 1995

This is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.
https://goo.gl/U2Uwz2

Features are computed from a digitized image of a fine needle
aspirate (FNA) of a breast mass.  They describe
characteristics of the cell nuclei present in the image.

Separating plane described above was obtained using
Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree
Construction Via Linear Programming." Proceedings of the 4th
Midwest Artificial Intelligence and Cognitive Science Society,
pp. 97-101, 1992], a classification method which uses linear
programming to construct a decision tree.  Relevant features
were selected using an exhaustive search in the space of 1-4
features and 1-3 separating planes.

The actual linear program used to obtain the separating plane
in the 3-dimensional space is that described in:
[K. P. Bennett and O. L. Mangasarian: "Robust Linear
Programming Discrimination of Two Linearly Inseparable Sets",
Optimization Methods and Software 1, 1992, 23-34].

This database is also available through the UW CS ftp server:

ftp ftp.cs.wisc.edu
cd math-prog/cpo-dataset/machine-learn/WDBC/

.. topic:: References

   - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction 
     for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on 
     Electronic Imaging: Science and Technology, volume 1905, pages 861-870,
     San Jose, CA, 1993.
   - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and 
     prognosis via linear programming. Operations Research, 43(4), pages 570-577, 
     July-August 1995.
   - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques
     to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) 
     163-171.
In [13]:
# The main features DataFrame
df = pd.DataFrame(cancer['data'],columns=cancer['feature_names'])
df.head()
Out[13]:
mean radius mean texture mean perimeter mean area mean smoothness mean compactness mean concavity mean concave points mean symmetry mean fractal dimension ... worst radius worst texture worst perimeter worst area worst smoothness worst compactness worst concavity worst concave points worst symmetry worst fractal dimension
0 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.07871 ... 25.38 17.33 184.60 2019.0 0.1622 0.6656 0.7119 0.2654 0.4601 0.11890
1 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.05667 ... 24.99 23.41 158.80 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902
2 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.05999 ... 23.57 25.53 152.50 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758
3 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 0.09744 ... 14.91 26.50 98.87 567.7 0.2098 0.8663 0.6869 0.2575 0.6638 0.17300
4 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 0.05883 ... 22.54 16.67 152.20 1575.0 0.1374 0.2050 0.4000 0.1625 0.2364 0.07678

5 rows × 30 columns

In [17]:
# The prediction labels DataFrame
df_target = pd.DataFrame(cancer['target'],columns=['Cancer'])
In [14]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 30 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         569 non-null    float64
 15  compactness error        569 non-null    float64
 16  concavity error          569 non-null    float64
 17  concave points error     569 non-null    float64
 18  symmetry error           569 non-null    float64
 19  fractal dimension error  569 non-null    float64
 20  worst radius             569 non-null    float64
 21  worst texture            569 non-null    float64
 22  worst perimeter          569 non-null    float64
 23  worst area               569 non-null    float64
 24  worst smoothness         569 non-null    float64
 25  worst compactness        569 non-null    float64
 26  worst concavity          569 non-null    float64
 27  worst concave points     569 non-null    float64
 28  worst symmetry           569 non-null    float64
 29  worst fractal dimension  569 non-null    float64
dtypes: float64(30)
memory usage: 133.5 KB
In [15]:
df.describe().transpose()
Out[15]:
count mean std min 25% 50% 75% max
mean radius 569.0 14.127292 3.524049 6.981000 11.700000 13.370000 15.780000 28.11000
mean texture 569.0 19.289649 4.301036 9.710000 16.170000 18.840000 21.800000 39.28000
mean perimeter 569.0 91.969033 24.298981 43.790000 75.170000 86.240000 104.100000 188.50000
mean area 569.0 654.889104 351.914129 143.500000 420.300000 551.100000 782.700000 2501.00000
mean smoothness 569.0 0.096360 0.014064 0.052630 0.086370 0.095870 0.105300 0.16340
mean compactness 569.0 0.104341 0.052813 0.019380 0.064920 0.092630 0.130400 0.34540
mean concavity 569.0 0.088799 0.079720 0.000000 0.029560 0.061540 0.130700 0.42680
mean concave points 569.0 0.048919 0.038803 0.000000 0.020310 0.033500 0.074000 0.20120
mean symmetry 569.0 0.181162 0.027414 0.106000 0.161900 0.179200 0.195700 0.30400
mean fractal dimension 569.0 0.062798 0.007060 0.049960 0.057700 0.061540 0.066120 0.09744
radius error 569.0 0.405172 0.277313 0.111500 0.232400 0.324200 0.478900 2.87300
texture error 569.0 1.216853 0.551648 0.360200 0.833900 1.108000 1.474000 4.88500
perimeter error 569.0 2.866059 2.021855 0.757000 1.606000 2.287000 3.357000 21.98000
area error 569.0 40.337079 45.491006 6.802000 17.850000 24.530000 45.190000 542.20000
smoothness error 569.0 0.007041 0.003003 0.001713 0.005169 0.006380 0.008146 0.03113
compactness error 569.0 0.025478 0.017908 0.002252 0.013080 0.020450 0.032450 0.13540
concavity error 569.0 0.031894 0.030186 0.000000 0.015090 0.025890 0.042050 0.39600
concave points error 569.0 0.011796 0.006170 0.000000 0.007638 0.010930 0.014710 0.05279
symmetry error 569.0 0.020542 0.008266 0.007882 0.015160 0.018730 0.023480 0.07895
fractal dimension error 569.0 0.003795 0.002646 0.000895 0.002248 0.003187 0.004558 0.02984
worst radius 569.0 16.269190 4.833242 7.930000 13.010000 14.970000 18.790000 36.04000
worst texture 569.0 25.677223 6.146258 12.020000 21.080000 25.410000 29.720000 49.54000
worst perimeter 569.0 107.261213 33.602542 50.410000 84.110000 97.660000 125.400000 251.20000
worst area 569.0 880.583128 569.356993 185.200000 515.300000 686.500000 1084.000000 4254.00000
worst smoothness 569.0 0.132369 0.022832 0.071170 0.116600 0.131300 0.146000 0.22260
worst compactness 569.0 0.254265 0.157336 0.027290 0.147200 0.211900 0.339100 1.05800
worst concavity 569.0 0.272188 0.208624 0.000000 0.114500 0.226700 0.382900 1.25200
worst concave points 569.0 0.114606 0.065732 0.000000 0.064930 0.099930 0.161400 0.29100
worst symmetry 569.0 0.290076 0.061867 0.156500 0.250400 0.282200 0.317900 0.66380
worst fractal dimension 569.0 0.083946 0.018061 0.055040 0.071460 0.080040 0.092080 0.20750
In [ ]:
# sns.pairplot(df);

11.1) Training the SVC Model

In [21]:
X_train, X_test, y_train, y_test = train_test_split(df, cancer['target'], test_size=0.30, random_state=101)
In [24]:
svcmodel = SVC()
In [25]:
svcmodel.fit(X_train, y_train)
Out[25]:
SVC()
In [26]:
predictions = svc.predict(X_test)
In [27]:
print(classification_report(y_test,predictions))
print('The Confusion Matrix is: \n')
matrixc = confusion_matrix(y_test, predictions)
print(matrixc)
              precision    recall  f1-score   support

           0       0.95      0.85      0.90        66
           1       0.91      0.97      0.94       105

    accuracy                           0.92       171
   macro avg       0.93      0.91      0.92       171
weighted avg       0.93      0.92      0.92       171

The Confusion Matrix is: 

[[ 56  10]
 [  3 102]]

11.2) Gridsearch

Finding the right parameters (like what C or gamma values to use) is a tricky task! But luckily, we can be a little lazy and just try a bunch of combinations and see what works best! This idea of creating a 'grid' of parameters and just trying out all the possible combinations is called a Gridsearch, this method is common enough that Scikit-learn has this functionality built in with GridSearchCV! The CV stands for cross-validation which is the GridSearchCV takes a dictionary that describes the parameters that should be tried and a model to train. The grid of parameters is defined as a dictionary, where the keys are the parameters and the values are the settings to be tested.
In [29]:
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
In [30]:
from sklearn.model_selection import GridSearchCV
One of the great things about GridSearchCV is that it is a meta-estimator. It takes an estimator like SVC, and creates a new estimator, that behaves exactly the same - in this case, like a classifier. You should add refit=True and choose verbose to whatever number you want, higher the number, the more verbose (verbose just means the text output describing the process).
In [31]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)
What fit does is a bit more involved then usual. First, it runs the same loop with cross-validation, to find the best parameter combination. Once it has the best combination, it runs fit again on all data passed to fit (without cross-validation), to built a single new model using the best parameter setting.
In [32]:
# May take a while!
grid.fit(X_train,y_train)
Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.633, total=   0.1s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.625, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.637, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.637, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.625, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.633, total=   0.0s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.633, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.887, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.938, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.963, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.962, total=   0.0s
[CV] C=0.1, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.886, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1, gamma=1, kernel=rbf ........................................
[CV] ............ C=1, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.625, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1, gamma=0.01, kernel=rbf .....................................
[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.900, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.925, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.962, total=   0.0s
[CV] C=1, gamma=0.001, kernel=rbf ....................................
[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.937, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.950, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.975, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.962, total=   0.0s
[CV] C=1, gamma=0.0001, kernel=rbf ...................................
[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.937, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........... C=10, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.613, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=10, gamma=0.01, kernel=rbf ....................................
[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.887, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.900, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.937, total=   0.0s
[CV] C=10, gamma=0.001, kernel=rbf ...................................
[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.924, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.950, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.975, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.949, total=   0.0s
[CV] C=10, gamma=0.0001, kernel=rbf ..................................
[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.949, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=100, gamma=1, kernel=rbf ......................................
[CV] .......... C=100, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=100, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.613, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=100, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.887, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.900, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.937, total=   0.0s
[CV] C=100, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.924, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.925, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.975, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.937, total=   0.0s
[CV] C=100, gamma=0.0001, kernel=rbf .................................
[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.949, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.625, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.637, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.613, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1000, gamma=0.01, kernel=rbf ..................................
[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.633, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.887, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.900, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.937, total=   0.0s
[CV] C=1000, gamma=0.001, kernel=rbf .................................
[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.924, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.938, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.912, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.963, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.924, total=   0.0s
[CV] C=1000, gamma=0.0001, kernel=rbf ................................
[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.962, total=   0.0s
[Parallel(n_jobs=1)]: Done 125 out of 125 | elapsed:    2.3s finished
Out[32]:
GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf']},
             verbose=3)
You can inspect the best parameters found by GridSearchCV in the best_params_ attribute, and the best estimator in the best_estimator_ attribute:
In [33]:
grid.best_params_
Out[33]:
{'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
In [34]:
grid.best_estimator_
Out[34]:
SVC(C=1, gamma=0.0001)
Then you can re-run predictions on this grid object just like you would with a normal model.
In [35]:
grid_predictions = grid.predict(X_test)
In [36]:
print(confusion_matrix(y_test,grid_predictions))
[[ 59   7]
 [  4 101]]
In [37]:
print(classification_report(y_test,grid_predictions))
              precision    recall  f1-score   support

           0       0.94      0.89      0.91        66
           1       0.94      0.96      0.95       105

    accuracy                           0.94       171
   macro avg       0.94      0.93      0.93       171
weighted avg       0.94      0.94      0.94       171

**12) K Means Clustering:-**

K Means Clustering is an unsupervised learning algorithm that tries to cluster data based on their similarity. Unsupervised learning means that there is no outcome to be predicted, and the algorithm just tries to find patterns in the data. In k means clustering, we have the specify the number of clusters we want the data to be grouped into. The algorithm randomly assigns each observation to a cluster, and finds the centroid of each cluster. Then, the algorithm iterates through two steps: Reassign data points to the cluster whose centroid is closest. Calculate new centroid of each cluster. These two steps are repeated till the within cluster variation cannot be reduced any further. The within cluster variation is calculated as the sum of the euclidean distance between the data points and their respective cluster centroids.
In [39]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split

from sklearn.cluster import KMeans                      # For KMeans Clustering

from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [38]:
from sklearn.datasets import make_blobs
In [51]:
# Creating a Random Clustered dataset
data = make_blobs(n_samples=2000, n_features=2, centers=4, cluster_std=1.8,random_state=101)

12.1) EDA:

In [52]:
plt.scatter(data[0][:,0],data[0][:,1],c=data[1],cmap='rainbow');

12.2) Creating the Clusters Model:

In [53]:
kmeans = KMeans(n_clusters=4)
In [54]:
# Fit the Model to the features
kmeans.fit(data[0])
Out[54]:
KMeans(n_clusters=4)
In [55]:
kmeans.cluster_centers_
Out[55]:
array([[-3.8395076 ,  7.87028684],
       [-9.34438237, -6.48824207],
       [ 3.89770507,  6.81712967],
       [ 0.44949255,  1.32426285]])
In [56]:
kmeans.labels_
Out[56]:
array([1, 3, 1, ..., 1, 0, 0])
In [57]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True,figsize=(10,6))

ax1.set_title('K Means')
ax1.scatter(data[0][:,0],data[0][:,1],c=kmeans.labels_,cmap='rainbow');
ax2.set_title("Original")
ax2.scatter(data[0][:,0],data[0][:,1],c=data[1],cmap='rainbow');

**13) Principal Component Analysis:-**

In [ ]:
## <font color='darkred'> **12) K Means Clustering:-** <a name="p12"></a> </font>
PCA isn't exactly a full machine learning algorithm, but instead an unsupervised learning algorithm. As we've noticed before it is difficult to visualize high dimensional data, we can use PCA to find the first two principal components, and visualize the data in this new, two-dimensional space, with a single scatter-plot. Before we do this though, we'll need to scale our data so that each feature has a single unit variance.
In [66]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split
from sklearn.preprocessing import StandardScaler        # For scaling the data

from sklearn.decomposition import PCA                   # For PCA

from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [68]:
from sklearn.datasets import load_breast_cancer
In [69]:
cancer = load_breast_cancer()
In [70]:
cancer.keys()
Out[70]:
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])
In [71]:
print(cancer['DESCR'])
.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radius, field
        10 is Radius SE, field 20 is Worst Radius.

        - class:
                - WDBC-Malignant
                - WDBC-Benign

    :Summary Statistics:

    ===================================== ====== ======
                                           Min    Max
    ===================================== ====== ======
    radius (mean):                        6.981  28.11
    texture (mean):                       9.71   39.28
    perimeter (mean):                     43.79  188.5
    area (mean):                          143.5  2501.0
    smoothness (mean):                    0.053  0.163
    compactness (mean):                   0.019  0.345
    concavity (mean):                     0.0    0.427
    concave points (mean):                0.0    0.201
    symmetry (mean):                      0.106  0.304
    fractal dimension (mean):             0.05   0.097
    radius (standard error):              0.112  2.873
    texture (standard error):             0.36   4.885
    perimeter (standard error):           0.757  21.98
    area (standard error):                6.802  542.2
    smoothness (standard error):          0.002  0.031
    compactness (standard error):         0.002  0.135
    concavity (standard error):           0.0    0.396
    concave points (standard error):      0.0    0.053
    symmetry (standard error):            0.008  0.079
    fractal dimension (standard error):   0.001  0.03
    radius (worst):                       7.93   36.04
    texture (worst):                      12.02  49.54
    perimeter (worst):                    50.41  251.2
    area (worst):                         185.2  4254.0
    smoothness (worst):                   0.071  0.223
    compactness (worst):                  0.027  1.058
    concavity (worst):                    0.0    1.252
    concave points (worst):               0.0    0.291
    symmetry (worst):                     0.156  0.664
    fractal dimension (worst):            0.055  0.208
    ===================================== ====== ======

    :Missing Attribute Values: None

    :Class Distribution: 212 - Malignant, 357 - Benign

    :Creator:  Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian

    :Donor: Nick Street

    :Date: November, 1995

This is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.
https://goo.gl/U2Uwz2

Features are computed from a digitized image of a fine needle
aspirate (FNA) of a breast mass.  They describe
characteristics of the cell nuclei present in the image.

Separating plane described above was obtained using
Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree
Construction Via Linear Programming." Proceedings of the 4th
Midwest Artificial Intelligence and Cognitive Science Society,
pp. 97-101, 1992], a classification method which uses linear
programming to construct a decision tree.  Relevant features
were selected using an exhaustive search in the space of 1-4
features and 1-3 separating planes.

The actual linear program used to obtain the separating plane
in the 3-dimensional space is that described in:
[K. P. Bennett and O. L. Mangasarian: "Robust Linear
Programming Discrimination of Two Linearly Inseparable Sets",
Optimization Methods and Software 1, 1992, 23-34].

This database is also available through the UW CS ftp server:

ftp ftp.cs.wisc.edu
cd math-prog/cpo-dataset/machine-learn/WDBC/

.. topic:: References

   - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction 
     for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on 
     Electronic Imaging: Science and Technology, volume 1905, pages 861-870,
     San Jose, CA, 1993.
   - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and 
     prognosis via linear programming. Operations Research, 43(4), pages 570-577, 
     July-August 1995.
   - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques
     to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) 
     163-171.
In [72]:
df = pd.DataFrame(cancer['data'],columns=cancer['feature_names'])
#(['DESCR', 'data', 'feature_names', 'target_names', 'target'])
In [73]:
df.head()
Out[73]:
mean radius mean texture mean perimeter mean area mean smoothness mean compactness mean concavity mean concave points mean symmetry mean fractal dimension ... worst radius worst texture worst perimeter worst area worst smoothness worst compactness worst concavity worst concave points worst symmetry worst fractal dimension
0 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.07871 ... 25.38 17.33 184.60 2019.0 0.1622 0.6656 0.7119 0.2654 0.4601 0.11890
1 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.05667 ... 24.99 23.41 158.80 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902
2 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.05999 ... 23.57 25.53 152.50 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758
3 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 0.09744 ... 14.91 26.50 98.87 567.7 0.2098 0.8663 0.6869 0.2575 0.6638 0.17300
4 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 0.05883 ... 22.54 16.67 152.20 1575.0 0.1374 0.2050 0.4000 0.1625 0.2364 0.07678

5 rows × 30 columns

In [74]:
# Scaling the data
scaler = StandardScaler()
scaler.fit(df)
Out[74]:
StandardScaler()
In [75]:
scaled_data = scaler.transform(df)
PCA with Scikit Learn uses a very similar process to other preprocessing functions that come with SciKit Learn. We instantiate a PCA object, find the principal components using the fit method, then apply the rotation and dimensionality reduction by calling transform(). We can also specify how many components we want to keep when creating the PCA object.

13.1) PCA Model:

In [76]:
pca = PCA(n_components=2)
In [77]:
pca.fit(scaled_data)
Out[77]:
PCA(n_components=2)

Now we can transform this data to its first 2 principal components.

In [78]:
x_pca = pca.transform(scaled_data)
In [79]:
scaled_data.shape
Out[79]:
(569, 30)
In [80]:
x_pca.shape
Out[80]:
(569, 2)

Great! We've reduced 30 dimensions to just 2! Let's plot these two dimensions out!

In [81]:
plt.figure(figsize=(8,6))
plt.scatter(x_pca[:,0],x_pca[:,1],c=cancer['target'],cmap='plasma');
plt.xlabel('First principal component');
plt.ylabel('Second Principal Component');

13.2) Interpreting the components

Clearly by using these two components we can easily separate these two classes.

Unfortunately, with this great power of dimensionality reduction, comes the cost of being able to easily understand what these components represent.

The components correspond to combinations of the original features, the components themselves are stored as an attribute of the fitted PCA object:

In [82]:
pca.components_
Out[82]:
array([[ 0.21890244,  0.10372458,  0.22753729,  0.22099499,  0.14258969,
         0.23928535,  0.25840048,  0.26085376,  0.13816696,  0.06436335,
         0.20597878,  0.01742803,  0.21132592,  0.20286964,  0.01453145,
         0.17039345,  0.15358979,  0.1834174 ,  0.04249842,  0.10256832,
         0.22799663,  0.10446933,  0.23663968,  0.22487053,  0.12795256,
         0.21009588,  0.22876753,  0.25088597,  0.12290456,  0.13178394],
       [-0.23385713, -0.05970609, -0.21518136, -0.23107671,  0.18611302,
         0.15189161,  0.06016536, -0.0347675 ,  0.19034877,  0.36657547,
        -0.10555215,  0.08997968, -0.08945723, -0.15229263,  0.20443045,
         0.2327159 ,  0.19720728,  0.13032156,  0.183848  ,  0.28009203,
        -0.21986638, -0.0454673 , -0.19987843, -0.21935186,  0.17230435,
         0.14359317,  0.09796411, -0.00825724,  0.14188335,  0.27533947]])
In this numpy matrix array, each row represents a principal component, and each column relates back to the original features. we can visualize this relationship with a heatmap. This heatmap and the color bar basically represent the correlation between the various feature and the principal component itself.:
In [83]:
df_comp = pd.DataFrame(pca.components_,columns=cancer['feature_names'])
In [84]:
plt.figure(figsize=(12,6))
sns.heatmap(df_comp,cmap='plasma',);

**14) Recommender Systems:-**

In [39]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split

from sklearn.cluster import KMeans                      # For KMeans Clustering

from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [86]:
# Importing the dataset
column_names = ['user_id', 'item_id', 'rating', 'timestamp']
df = pd.read_csv('u-Copy1.data', sep='\t', names=column_names)
In [91]:
# Getting the Movie Titles as well:
movie_titles = pd.read_csv("Movie_Id_Titles-Copy1")
movie_titles.head()
Out[91]:
item_id title
0 1 Toy Story (1995)
1 2 GoldenEye (1995)
2 3 Four Rooms (1995)
3 4 Get Shorty (1995)
4 5 Copycat (1995)
In [ ]:
# Merging the DataFrames:
df = pd.merge(df,movie_titles,on='item_id')
In [93]:
df.head()
Out[93]:
user_id item_id rating timestamp title
0 0 50 5 881250949 Star Wars (1977)
1 290 50 5 880473582 Star Wars (1977)
2 79 50 4 891271545 Star Wars (1977)
3 2 50 5 888552084 Star Wars (1977)
4 8 50 5 879362124 Star Wars (1977)
In [94]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 100003 entries, 0 to 100002
Data columns (total 5 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   user_id    100003 non-null  int64 
 1   item_id    100003 non-null  int64 
 2   rating     100003 non-null  int64 
 3   timestamp  100003 non-null  int64 
 4   title      100003 non-null  object
dtypes: int64(4), object(1)
memory usage: 4.6+ MB
In [95]:
df.nunique()
Out[95]:
user_id        944
item_id       1682
rating           5
timestamp    49282
title         1664
dtype: int64

14.1) EDA

In [105]:
# Average Rating for each movie
meanr = pd.DataFrame(df.groupby('title').mean()['rating'])
meanr.sort_values(by='rating', ascending=False)
Out[105]:
rating
title
They Made Me a Criminal (1939) 5.0
Marlene Dietrich: Shadow and Light (1996) 5.0
Saint of Fort Washington, The (1993) 5.0
Someone Else's America (1995) 5.0
Star Kid (1997) 5.0
... ...
Eye of Vichy, The (Oeil de Vichy, L') (1993) 1.0
King of New York (1990) 1.0
Touki Bouki (Journey of the Hyena) (1973) 1.0
Bloody Child, The (1996) 1.0
Crude Oasis, The (1995) 1.0

1664 rows × 1 columns

In [107]:
# Number of Ratings for each movie
countr = pd.DataFrame(df.groupby('title').count()['rating'])
countr.sort_values(by='rating', ascending=False)
Out[107]:
rating
title
Star Wars (1977) 584
Contact (1997) 509
Fargo (1996) 508
Return of the Jedi (1983) 507
Liar Liar (1997) 485
... ...
Great Day in Harlem, A (1994) 1
Other Voices, Other Rooms (1997) 1
Good Morning (1971) 1
Girls Town (1996) 1
Á köldum klaka (Cold Fever) (1994) 1

1664 rows × 1 columns

In [110]:
dfr = pd.merge(meanr, countr, on='title')
dfr.rename(columns = {'rating_x': 'avg rating', 'rating_y': 'no of ratings'}, inplace = True)
dfr.head()
Out[110]:
avg rating no of ratings
title
'Til There Was You (1997) 2.333333 9
1-900 (1994) 2.600000 5
101 Dalmatians (1996) 2.908257 109
12 Angry Men (1957) 4.344000 125
187 (1997) 3.024390 41
In [115]:
sns.distplot(dfr['avg rating'], bins=50);
In [118]:
sns.jointplot(dfr['avg rating'], dfr['no of ratings']);

14.2) Recommender System Model:

In [119]:
moviemat = df.pivot_table(index='user_id',columns='title',values='rating')
moviemat.head()
Out[119]:
title 'Til There Was You (1997) 1-900 (1994) 101 Dalmatians (1996) 12 Angry Men (1957) 187 (1997) 2 Days in the Valley (1996) 20,000 Leagues Under the Sea (1954) 2001: A Space Odyssey (1968) 3 Ninjas: High Noon At Mega Mountain (1998) 39 Steps, The (1935) ... Yankee Zulu (1994) Year of the Horse (1997) You So Crazy (1994) Young Frankenstein (1974) Young Guns (1988) Young Guns II (1990) Young Poisoner's Handbook, The (1995) Zeus and Roxanne (1997) unknown Á köldum klaka (Cold Fever) (1994)
user_id
0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 NaN NaN 2.0 5.0 NaN NaN 3.0 4.0 NaN NaN ... NaN NaN NaN 5.0 3.0 NaN NaN NaN 4.0 NaN
2 NaN NaN NaN NaN NaN NaN NaN NaN 1.0 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN 2.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 1664 columns

In [120]:
starwars_user_ratings = moviemat['Star Wars (1977)']
liarliar_user_ratings = moviemat['Liar Liar (1997)']
starwars_user_ratings.head()
Out[120]:
user_id
0    5.0
1    5.0
2    5.0
3    NaN
4    5.0
Name: Star Wars (1977), dtype: float64
In [122]:
similar_to_starwars = moviemat.corrwith(starwars_user_ratings);
similar_to_liarliar = moviemat.corrwith(liarliar_user_ratings);
In [123]:
corr_starwars = pd.DataFrame(similar_to_starwars,columns=['Correlation'])
corr_starwars.dropna(inplace=True)
corr_starwars.head()
Out[123]:
Correlation
title
'Til There Was You (1997) 0.872872
1-900 (1994) -0.645497
101 Dalmatians (1996) 0.211132
12 Angry Men (1957) 0.184289
187 (1997) 0.027398
In [124]:
corr_starwars.sort_values('Correlation',ascending=False).head(10)
Out[124]:
Correlation
title
Commandments (1997) 1.0
Cosi (1996) 1.0
No Escape (1994) 1.0
Stripes (1981) 1.0
Man of the Year (1995) 1.0
Hollow Reed (1996) 1.0
Beans of Egypt, Maine, The (1994) 1.0
Good Man in Africa, A (1994) 1.0
Old Lady Who Walked in the Sea, The (Vieille qui marchait dans la mer, La) (1991) 1.0
Outlaw, The (1943) 1.0
In [126]:
corr_starwars = corr_starwars.join(dfr['no of ratings'])
corr_starwars.head()
Out[126]:
Correlation no of ratings
title
'Til There Was You (1997) 0.872872 9
1-900 (1994) -0.645497 5
101 Dalmatians (1996) 0.211132 109
12 Angry Men (1957) 0.184289 125
187 (1997) 0.027398 41
In [128]:
corr_starwars[corr_starwars['no of ratings']>100].sort_values('Correlation',ascending=False).head()
Out[128]:
Correlation no of ratings
title
Star Wars (1977) 1.000000 584
Empire Strikes Back, The (1980) 0.748353 368
Return of the Jedi (1983) 0.672556 507
Raiders of the Lost Ark (1981) 0.536117 420
Austin Powers: International Man of Mystery (1997) 0.377433 130
In [131]:
corr_liarliar = pd.DataFrame(similar_to_liarliar,columns=['Correlation'])
corr_liarliar.dropna(inplace=True)
corr_liarliar = corr_liarliar.join(dfr['no of ratings'])
corr_liarliar[corr_liarliar['no of ratings']>100].sort_values('Correlation',ascending=False).head()
Out[131]:
Correlation no of ratings
title
Liar Liar (1997) 1.000000 485
Batman Forever (1995) 0.516968 114
Mask, The (1994) 0.484650 129
Down Periscope (1996) 0.472681 101
Con Air (1997) 0.469828 137

**15) Statistical Testing:-**

We use statistics in a lot of different ways in data science, and on this lecture, I want to refresh your knowledge of hypothesis testing, which is a core data analysis activity behind experimentation. The goal of hypothesis testing is to determine if, for instance, the two different conditions we have in an experiment have resulted in different impacts
In [15]:
# Importing new libraries from scipy
from scipy import stats
In [16]:
df = pd.read_csv('grades.csv')
In [18]:
df.head()
Out[18]:
student_id assignment1_grade assignment1_submission assignment2_grade assignment2_submission assignment3_grade assignment3_submission assignment4_grade assignment4_submission assignment5_grade assignment5_submission assignment6_grade assignment6_submission
0 B73F2C11-70F0-E37D-8B10-1D20AFED50B1 92.733946 2015-11-02 06:55:34.282000000 83.030552 2015-11-09 02:22:58.938000000 67.164441 2015-11-12 08:58:33.998000000 53.011553 2015-11-16 01:21:24.663000000 47.710398 2015-11-20 13:24:59.692000000 38.168318 2015-11-22 18:31:15.934000000
1 98A0FAE0-A19A-13D2-4BB5-CFBFD94031D1 86.790821 2015-11-29 14:57:44.429000000 86.290821 2015-12-06 17:41:18.449000000 69.772657 2015-12-10 08:54:55.904000000 55.098125 2015-12-13 17:32:30.941000000 49.588313 2015-12-19 23:26:39.285000000 44.629482 2015-12-21 17:07:24.275000000
2 D0F62040-CEB0-904C-F563-2F8620916C4E 85.512541 2016-01-09 05:36:02.389000000 85.512541 2016-01-09 06:39:44.416000000 68.410033 2016-01-15 20:22:45.882000000 54.728026 2016-01-11 12:41:50.749000000 49.255224 2016-01-11 17:31:12.489000000 44.329701 2016-01-17 16:24:42.765000000
3 FFDF2B2C-F514-EF7F-6538-A6A53518E9DC 86.030665 2016-04-30 06:50:39.801000000 68.824532 2016-04-30 17:20:38.727000000 61.942079 2016-05-12 07:47:16.326000000 49.553663 2016-05-07 16:09:20.485000000 49.553663 2016-05-24 12:51:18.016000000 44.598297 2016-05-26 08:09:12.058000000
4 5ECBEEB6-F1CE-80AE-3164-E45E99473FB4 64.813800 2015-12-13 17:06:10.750000000 51.491040 2015-12-14 12:25:12.056000000 41.932832 2015-12-29 14:25:22.594000000 36.929549 2015-12-28 01:29:55.901000000 33.236594 2015-12-29 14:46:06.628000000 33.236594 2016-01-05 01:06:59.546000000
In [19]:
df.describe().T
Out[19]:
count mean std min 25% 50% 75% max
assignment1_grade 2315.0 74.535732 16.353252 14.423297 63.670100 77.208365 87.502146 100.695583
assignment2_grade 2315.0 66.849007 15.959210 12.980967 56.127794 68.142124 78.310880 99.936206
assignment3_grade 2315.0 60.623197 15.492469 12.307682 49.866390 61.307206 71.292632 99.655813
assignment4_grade 2315.0 54.112112 14.687431 9.126146 43.852636 54.442888 63.789234 98.755813
assignment5_grade 2315.0 48.618522 13.927054 8.213531 38.859619 48.681165 57.662236 97.571739
assignment6_grade 2315.0 43.841452 13.259413 7.392178 34.828619 43.172442 52.086086 97.571739
In [20]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2315 entries, 0 to 2314
Data columns (total 13 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   student_id              2315 non-null   object 
 1   assignment1_grade       2315 non-null   float64
 2   assignment1_submission  2315 non-null   object 
 3   assignment2_grade       2315 non-null   float64
 4   assignment2_submission  2315 non-null   object 
 5   assignment3_grade       2315 non-null   float64
 6   assignment3_submission  2315 non-null   object 
 7   assignment4_grade       2315 non-null   float64
 8   assignment4_submission  2315 non-null   object 
 9   assignment5_grade       2315 non-null   float64
 10  assignment5_submission  2315 non-null   object 
 11  assignment6_grade       2315 non-null   float64
 12  assignment6_submission  2315 non-null   object 
dtypes: float64(6), object(7)
memory usage: 235.2+ KB
In [26]:
early_finishers = df[df['assignment1_submission'].apply(pd.to_datetime) < '2016']
early_finishers.head()
Out[26]:
student_id assignment1_grade assignment1_submission assignment2_grade assignment2_submission assignment3_grade assignment3_submission assignment4_grade assignment4_submission assignment5_grade assignment5_submission assignment6_grade assignment6_submission
0 B73F2C11-70F0-E37D-8B10-1D20AFED50B1 92.733946 2015-11-02 06:55:34.282000000 83.030552 2015-11-09 02:22:58.938000000 67.164441 2015-11-12 08:58:33.998000000 53.011553 2015-11-16 01:21:24.663000000 47.710398 2015-11-20 13:24:59.692000000 38.168318 2015-11-22 18:31:15.934000000
1 98A0FAE0-A19A-13D2-4BB5-CFBFD94031D1 86.790821 2015-11-29 14:57:44.429000000 86.290821 2015-12-06 17:41:18.449000000 69.772657 2015-12-10 08:54:55.904000000 55.098125 2015-12-13 17:32:30.941000000 49.588313 2015-12-19 23:26:39.285000000 44.629482 2015-12-21 17:07:24.275000000
4 5ECBEEB6-F1CE-80AE-3164-E45E99473FB4 64.813800 2015-12-13 17:06:10.750000000 51.491040 2015-12-14 12:25:12.056000000 41.932832 2015-12-29 14:25:22.594000000 36.929549 2015-12-28 01:29:55.901000000 33.236594 2015-12-29 14:46:06.628000000 33.236594 2016-01-05 01:06:59.546000000
5 D09000A0-827B-C0FF-3433-BF8FF286E15B 71.647278 2015-12-28 04:35:32.836000000 64.052550 2016-01-03 21:05:38.392000000 64.752550 2016-01-07 08:55:43.692000000 57.467295 2016-01-11 00:45:28.706000000 57.467295 2016-01-11 00:54:13.579000000 57.467295 2016-01-20 19:54:46.166000000
8 C9D51293-BD58-F113-4167-A7C0BAFCB6E5 66.595568 2015-12-25 02:29:28.415000000 52.916454 2015-12-31 01:42:30.046000000 48.344809 2016-01-05 23:34:02.180000000 47.444809 2016-01-02 07:48:42.517000000 37.955847 2016-01-03 21:27:04.266000000 37.955847 2016-01-19 15:24:31.060000000
In [24]:
late_finsihers = df[df['assignment1_submission'].apply(pd.to_datetime) > '2016']
late_finsihers.head()
Out[24]:
student_id assignment1_grade assignment1_submission assignment2_grade assignment2_submission assignment3_grade assignment3_submission assignment4_grade assignment4_submission assignment5_grade assignment5_submission assignment6_grade assignment6_submission
2 D0F62040-CEB0-904C-F563-2F8620916C4E 85.512541 2016-01-09 05:36:02.389000000 85.512541 2016-01-09 06:39:44.416000000 68.410033 2016-01-15 20:22:45.882000000 54.728026 2016-01-11 12:41:50.749000000 49.255224 2016-01-11 17:31:12.489000000 44.329701 2016-01-17 16:24:42.765000000
3 FFDF2B2C-F514-EF7F-6538-A6A53518E9DC 86.030665 2016-04-30 06:50:39.801000000 68.824532 2016-04-30 17:20:38.727000000 61.942079 2016-05-12 07:47:16.326000000 49.553663 2016-05-07 16:09:20.485000000 49.553663 2016-05-24 12:51:18.016000000 44.598297 2016-05-26 08:09:12.058000000
6 3217BE3F-E4B0-C3B6-9F64-462456819CE4 87.498744 2016-03-05 11:05:25.408000000 69.998995 2016-03-09 07:29:52.405000000 55.999196 2016-03-16 22:31:24.316000000 50.399276 2016-03-18 07:19:26.032000000 45.359349 2016-03-19 10:35:41.869000000 45.359349 2016-03-23 14:02:00.987000000
7 F1CB5AA1-B3DE-5460-FAFF-BE951FD38B5F 80.576090 2016-01-24 18:24:25.619000000 72.518481 2016-01-27 13:37:12.943000000 65.266633 2016-01-30 14:34:36.581000000 65.266633 2016-02-03 22:08:49.002000000 65.266633 2016-02-16 14:22:23.664000000 65.266633 2016-02-18 08:35:04.796000000
9 E2C617C2-4654-622C-AB50-1550C4BE42A0 59.270882 2016-03-06 12:06:26.185000000 59.270882 2016-03-13 02:07:25.289000000 53.343794 2016-03-17 07:30:09.241000000 53.343794 2016-03-20 21:45:56.229000000 42.675035 2016-03-27 15:55:04.414000000 38.407532 2016-03-30 20:33:13.554000000
In [27]:
# Another solution. First, the dataframe df and the early_finishers share index values, so I really just
# want everything in the df which is not in early_finishers
late_finishers=df[~df.index.isin(early_finishers.index)]
late_finishers.head()
Out[27]:
student_id assignment1_grade assignment1_submission assignment2_grade assignment2_submission assignment3_grade assignment3_submission assignment4_grade assignment4_submission assignment5_grade assignment5_submission assignment6_grade assignment6_submission
2 D0F62040-CEB0-904C-F563-2F8620916C4E 85.512541 2016-01-09 05:36:02.389000000 85.512541 2016-01-09 06:39:44.416000000 68.410033 2016-01-15 20:22:45.882000000 54.728026 2016-01-11 12:41:50.749000000 49.255224 2016-01-11 17:31:12.489000000 44.329701 2016-01-17 16:24:42.765000000
3 FFDF2B2C-F514-EF7F-6538-A6A53518E9DC 86.030665 2016-04-30 06:50:39.801000000 68.824532 2016-04-30 17:20:38.727000000 61.942079 2016-05-12 07:47:16.326000000 49.553663 2016-05-07 16:09:20.485000000 49.553663 2016-05-24 12:51:18.016000000 44.598297 2016-05-26 08:09:12.058000000
6 3217BE3F-E4B0-C3B6-9F64-462456819CE4 87.498744 2016-03-05 11:05:25.408000000 69.998995 2016-03-09 07:29:52.405000000 55.999196 2016-03-16 22:31:24.316000000 50.399276 2016-03-18 07:19:26.032000000 45.359349 2016-03-19 10:35:41.869000000 45.359349 2016-03-23 14:02:00.987000000
7 F1CB5AA1-B3DE-5460-FAFF-BE951FD38B5F 80.576090 2016-01-24 18:24:25.619000000 72.518481 2016-01-27 13:37:12.943000000 65.266633 2016-01-30 14:34:36.581000000 65.266633 2016-02-03 22:08:49.002000000 65.266633 2016-02-16 14:22:23.664000000 65.266633 2016-02-18 08:35:04.796000000
9 E2C617C2-4654-622C-AB50-1550C4BE42A0 59.270882 2016-03-06 12:06:26.185000000 59.270882 2016-03-13 02:07:25.289000000 53.343794 2016-03-17 07:30:09.241000000 53.343794 2016-03-20 21:45:56.229000000 42.675035 2016-03-27 15:55:04.414000000 38.407532 2016-03-30 20:33:13.554000000
In [28]:
early_finishers.describe().T
Out[28]:
count mean std min 25% 50% 75% max
assignment1_grade 1259.0 74.947285 16.013220 14.959603 64.551691 77.480784 87.587063 100.695583
assignment2_grade 1259.0 67.229129 15.629192 14.459603 56.656246 68.500902 78.110489 99.691182
assignment3_grade 1259.0 61.098805 15.104321 12.307682 50.567862 61.874388 71.213942 99.655813
assignment4_grade 1259.0 54.126001 14.417234 9.126146 43.821190 54.268828 63.713094 98.755813
assignment5_grade 1259.0 48.604524 13.733379 8.213531 38.953136 48.632200 57.505574 96.577300
assignment6_grade 1259.0 43.812144 13.040617 7.392178 34.910921 43.187982 52.161643 92.445001
In [29]:
late_finishers.describe().T
Out[29]:
count mean std min 25% 50% 75% max
assignment1_grade 1056.0 74.045065 16.744029 14.423297 63.118914 76.656572 87.385041 99.954899
assignment2_grade 1056.0 66.395813 16.339841 12.980967 55.655623 67.701557 78.417260 99.936206
assignment3_grade 1056.0 60.056162 15.931528 12.980967 49.462195 60.406658 71.419522 99.581715
assignment4_grade 1056.0 54.095552 15.010055 10.384774 44.032372 54.594597 64.033838 97.571739
assignment5_grade 1056.0 48.635211 14.161006 10.384774 38.862809 48.756713 57.921125 97.571739
assignment6_grade 1056.0 43.876394 13.521760 9.830180 34.706034 43.100879 52.021807 97.571739
Now, as we can observe, Early as well as Late finishers have obtained almost similar average scores in all the assignments. But are they the same? This is where the Statistical tests come in.The SciPy library contains a number of different statistical tests and forms a basis for hypothesis testing in Python and we're going to use the ttest_ind() function which does an independent t-test (meaning the populations are not related to one another). The result of ttest_index() are the t-statistic and a p-value. It's this latter value, the probability, which is most important to us, as it indicates the chance (between 0 and 1) of our null hypothesis being True.
In [30]:
# Let's bring in our ttest_ind function
from scipy.stats import ttest_ind
In [31]:
# Let's run this function with our two populations, looking at the assignment 1 grades
ttest_ind(early_finishers['assignment1_grade'], late_finishers['assignment1_grade'])
Out[31]:
Ttest_indResult(statistic=1.322354085372139, pvalue=0.1861810110171455)
So here we see that the probability is 0.18, and this is above our alpha value of 0.05. This means that we cannot reject the null hypothesis. The null hypothesis was that the two populations are the same, and we don't have enough certainty in our evidence (because it is greater than alpha) to come to a conclusion to the contrary. This doesn't mean that we have proven the populations are the same.
In [32]:
# Checking the other assignment grades as well:
print(ttest_ind(early_finishers['assignment2_grade'], late_finishers['assignment2_grade']))
print(ttest_ind(early_finishers['assignment3_grade'], late_finishers['assignment3_grade']))
print(ttest_ind(early_finishers['assignment4_grade'], late_finishers['assignment4_grade']))
print(ttest_ind(early_finishers['assignment5_grade'], late_finishers['assignment5_grade']))
print(ttest_ind(early_finishers['assignment6_grade'], late_finishers['assignment6_grade']))
Ttest_indResult(statistic=1.2514717608216366, pvalue=0.2108889627004424)
Ttest_indResult(statistic=1.6133726558705392, pvalue=0.10679998102227865)
Ttest_indResult(statistic=0.049671157386456125, pvalue=0.960388729789337)
Ttest_indResult(statistic=-0.05279315545404755, pvalue=0.9579012739746492)
Ttest_indResult(statistic=-0.11609743352612056, pvalue=0.9075854011989656)
P-values have come under fire recently for being insuficient for telling us enough about the interactions which are happening, and two other techniques, confidence intervalues and bayesian analyses, are being used more regularly. One issue with p-values is that as you run more tests you are likely to get a value which is statistically significant just by chance.

**16) Naive Bayes:-**

In [17]:
# Importing the required libraries:
from sklearn.model_selection import train_test_split    # For Train-Test split

from sklearn.preprocessing import StandardScaler        # For scaling the dataset
from sklearn.naive_bayes import GaussianNB              # The Gaussian Naive Bayes

from sklearn.metrics import classification_report       # For Classification Report
from sklearn.metrics import plot_confusion_matrix       # For Plotting Confusion Matrix
from sklearn.metrics import confusion_matrix            # For Confusion Matrix
In [10]:
df = pd.read_csv('diabetes.csv')
In [11]:
df.head()
Out[11]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1
In [12]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB
In [13]:
df.describe().T
Out[13]:
count mean std min 25% 50% 75% max
Pregnancies 768.0 3.845052 3.369578 0.000 1.00000 3.0000 6.00000 17.00
Glucose 768.0 120.894531 31.972618 0.000 99.00000 117.0000 140.25000 199.00
BloodPressure 768.0 69.105469 19.355807 0.000 62.00000 72.0000 80.00000 122.00
SkinThickness 768.0 20.536458 15.952218 0.000 0.00000 23.0000 32.00000 99.00
Insulin 768.0 79.799479 115.244002 0.000 0.00000 30.5000 127.25000 846.00
BMI 768.0 31.992578 7.884160 0.000 27.30000 32.0000 36.60000 67.10
DiabetesPedigreeFunction 768.0 0.471876 0.331329 0.078 0.24375 0.3725 0.62625 2.42
Age 768.0 33.240885 11.760232 21.000 24.00000 29.0000 41.00000 81.00
Outcome 768.0 0.348958 0.476951 0.000 0.00000 0.0000 1.00000 1.00
In [17]:
sns.pairplot(df);
In [15]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('Outcome', axis=1), df['Outcome'], test_size=0.3, random_state=42)
In [18]:
nb = GaussianNB()
In [19]:
nb.fit(X_train, y_train)
Out[19]:
GaussianNB()
In [20]:
predictions = nb.predict(X_test)
In [25]:
nb.score(X_test, y_test)
Out[25]:
0.7445887445887446
In [29]:
print(classification_report(y_test, predictions))
print(confusion_matrix(y_test, predictions))
              precision    recall  f1-score   support

           0       0.82      0.79      0.80       151
           1       0.62      0.66      0.64        80

    accuracy                           0.74       231
   macro avg       0.72      0.73      0.72       231
weighted avg       0.75      0.74      0.75       231

[[119  32]
 [ 27  53]]

**17) Time Series:-**

In [106]:
# Importing the required libraries:
from datetime import datetime
from datetime import timedelta

from dateutil.parser import parse

from pandas.tseries.offsets import Hour, Minute

PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.set_printoptions(precision=4, suppress=True)

17.1) Date and Time Data Types and Tools

In [40]:
today = datetime.now()
today
Out[40]:
datetime.datetime(2021, 3, 31, 18, 25, 46, 789983)
In [47]:
print(today.date())

print(today.day)
print(today.month)
print(today.year)

print(today.hour)
print(today.minute)
print(today.second)

print(today.astimezone())
print(today.timestamp())
2021-03-31
31
3
2021
18
25
46
2021-03-31 18:25:46.789983+05:30
1617195346.789983
In [56]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
print(delta)
print(delta.days)
print(delta.seconds)
926 days, 15:45:00
926
56700
In [59]:
# Using timedelta
start = datetime(2011, 1, 7)
start + timedelta(12)
start - 2*timedelta(12)
Out[59]:
datetime.datetime(2010, 12, 14, 0, 0)

17.2) Converting Between String and Datetime

In [63]:
stamp = datetime(year=2020, month=6, day=28)
str(stamp)
Out[63]:
'2020-06-28 00:00:00'
In [70]:
# Conversion from timestamp to string
s1 = stamp.strftime('%y-%m-%d')
s2 = stamp.strftime('%Y-%m-%d')


print(s1)
print(s2)
20-06-28
2020-06-28
In [73]:
# Conversion from string to timestamp
value = '2020-06-28'
d1 = datetime.strptime(value, '%Y-%m-%d')
d1
Out[73]:
datetime.datetime(2020, 6, 28, 0, 0)
'dateutil' is capable of parsing most human-intelligible date representations
In [76]:
d2 = parse('Jan 31, 1997 10:45 PM')
d2
Out[76]:
datetime.datetime(1997, 1, 31, 22, 45)
In [77]:
d3 = parse('6/12/2011', dayfirst=True)
d3
Out[77]:
datetime.datetime(2011, 12, 6, 0, 0)
pandas is generally oriented toward working with arrays of dates, whether used as an axis index or a column in a DataFrame. The to_datetime method parses many different kinds of date representations. NaT (Not a Time) is pandas’s null value for timestamp data.
In [78]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']
pd.to_datetime(datestrs)
Out[78]:
DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

17.3) Time Series Basics

A basic kind of time series object in pandas is a Series indexed by timestamps, which is often represented external to pandas as Python strings or datetime objects:
In [85]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.randn(6), index=dates)
ts.head()
Out[85]:
2011-01-02   -0.848077
2011-01-05    0.605965
2011-01-07   -2.018168
2011-01-08    0.740122
2011-01-10    0.528813
dtype: float64
In [80]:
ts.index
Out[80]:
DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)
Pandas stores timestamps using NumPy’s datetime64 data type at the nanosecond resolution
In [81]:
ts.index.dtype
Out[81]:
dtype('<M8[ns]')
Scalar values from a DatetimeIndex are pandas Timestamp objects
In [82]:
stamp = ts.index[0]
stamp
Out[82]:
Timestamp('2011-01-02 00:00:00')

17.4) Indexing, Selection, Subsetting

In [87]:
# We can use Timestamp indices in many different formats
print(ts['1/10/2011'])
print(ts['20110110'])
print(ts['2011-01-10'])
0.5288134940893595
0.5288134940893595
0.5288134940893595
For longer time series, a year or only a year and month can be passed to easily select slices of data. Let's create a longer series first.
In [88]:
longer_ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
longer_ts
Out[88]:
2000-01-01    0.188695
2000-01-02   -0.758872
2000-01-03   -0.933237
2000-01-04    0.955057
2000-01-05    0.190794
                ...   
2002-09-22    1.049188
2002-09-23   -0.079656
2002-09-24    0.356025
2002-09-25   -1.024983
2002-09-26   -0.226909
Freq: D, Length: 1000, dtype: float64
In [89]:
# Selecting only the indices from May 2001
longer_ts['2001-05']
Out[89]:
2001-05-01    1.892137
2001-05-02   -0.697859
2001-05-03    0.358151
2001-05-04    0.020895
2001-05-05   -0.332138
                ...   
2001-05-27    0.033668
2001-05-28   -0.124821
2001-05-29    1.439175
2001-05-30   -0.349295
2001-05-31   -1.072948
Freq: D, Length: 31, dtype: float64
Because most time series data is ordered chronologically, you can slice with timestamps not contained in a time series to perform a range query
In [90]:
ts['1/6/2011':'1/11/2011'] 
Out[90]:
2011-01-07   -2.018168
2011-01-08    0.740122
2011-01-10    0.528813
dtype: float64

17.5) Time Series with Duplicate Indices

In [91]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts
Out[91]:
2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32
In [92]:
# Checking if all the index timestamps are unique
dup_ts.index.is_unique
Out[92]:
False
In [93]:
print(dup_ts['1/3/2000'])  # not duplicated
print()
print(dup_ts['1/2/2000'])  # duplicated
4

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32
In [94]:
# Using groupby for duplicate indices
grouped = dup_ts.groupby(level=0)
grouped.mean()
grouped.count()
Out[94]:
2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

17.6) Date Ranges, Frequencies, and Shifting

Generic time series in pandas are assumed to be irregular; that is, they have no fixed frequency. For many applications this is sufficient. However, it’s often desirable to work relative to a fixed frequency, such as daily, monthly, or every 15 minutes, even if that means introducing missing values into a time series. Fortunately pandas has a full suite of standard time series frequencies and tools for resampling, inferring frequencies, and generating fixed-frequency date ranges. For example, you can convert the sample time series to be fixed daily frequency by calling resample:
In [98]:
ts
resampler = ts.resample('D')    # 'D' indicates daily frequency

17.6.1) Generating Date Ranges

pandas.date_range is responsible for generating a DatetimeIndex with an indicated length according to a particular frequency.
In [105]:
index = pd.date_range('2012-04-01', '2012-06-01')
index
Out[105]:
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
               '2012-05-27', '2012-05-28', '2012-05-29', '2012-05-30',
               '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')
By default, date_range generates daily timestamps. If you pass only a start or end date, you must pass a number of periods to generate:
In [100]:
# From start date, 20 periods
pd.date_range(start='2012-04-01', periods=20)
Out[100]:
DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')
In [101]:
# Reverse from end date, 20 periods
pd.date_range(end='2012-06-01', periods=20)
Out[101]:
DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')
The start and end dates define strict boundaries for the generated date index. For example, if you wanted a date index containing the last business day of each month, you would pass the 'BM' frequency (business end of month) and only dates falling on or inside the date interval will be included:
In [102]:
pd.date_range('2000-01-01', '2000-12-01', freq='BM')
Out[102]:
DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')
date_range by default preserves the time (if any) of the start or end timestamp
In [103]:
pd.date_range('2012-05-02 12:56:31', periods=5)
Out[103]:
DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')
Sometimes you will have start or end dates with time information but want to generate a set of timestamps normalized to midnight as a convention. To do this, there is a normalize option:
In [104]:
pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)
Out[104]:
DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

17.6.2) Frequencies and Date Offsets

In most applications, you would never need to explicitly create one of these objects, instead using a string alias like 'H' or '4H'. Putting an integer before the base frequency creates a multiple:
In [107]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')
Out[107]:
DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')
Similarly, you can pass frequency strings, like '1h30min', that will effectively be parsed to the same expression:
In [108]:
pd.date_range('2000-01-01', periods=10, freq='1h30min')
Out[108]:
DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')

17.6.3) Shifting (Leading and Lagging) Data

“Shifting” refers to moving data backward and forward through time. Both Series and DataFrame have a shift method for doing naive shifts forward or backward, leaving the index unmodified
In [109]:
ts = pd.Series(np.random.randn(4), index=pd.date_range('1/1/2000', periods=4, freq='M'))
print(ts)

print()
ts.shift(2)
ts.shift(-2)
2000-01-31    1.304680
2000-02-29    1.358845
2000-03-31    0.683689
2000-04-30   -0.539219
Freq: M, dtype: float64

Out[109]:
2000-01-31    0.683689
2000-02-29   -0.539219
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

Time Zone Handling

In [110]:
import pytz
pytz.common_timezones[-5:]
Out[110]:
['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']
In [111]:
tz = pytz.timezone('America/New_York')
tz
Out[111]:
<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

Time Zone Localization and Conversion

In [112]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
Out[112]:
2012-03-09 09:30:00   -0.804644
2012-03-10 09:30:00   -0.824182
2012-03-11 09:30:00   -0.553474
2012-03-12 09:30:00    0.368730
2012-03-13 09:30:00   -0.718007
2012-03-14 09:30:00   -1.028170
Freq: D, dtype: float64
In [113]:
print(ts.index.tz)
None
In [114]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')
Out[114]:
DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')
In [115]:
ts
ts_utc = ts.tz_localize('UTC')
ts_utc
ts_utc.index
Out[115]:
DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')
In [116]:
ts_utc.tz_convert('America/New_York')
Out[116]:
2012-03-09 04:30:00-05:00   -0.804644
2012-03-10 04:30:00-05:00   -0.824182
2012-03-11 05:30:00-04:00   -0.553474
2012-03-12 05:30:00-04:00    0.368730
2012-03-13 05:30:00-04:00   -0.718007
2012-03-14 05:30:00-04:00   -1.028170
Freq: D, dtype: float64
In [117]:
ts_eastern = ts.tz_localize('America/New_York')
ts_eastern.tz_convert('UTC')
ts_eastern.tz_convert('Europe/Berlin')
Out[117]:
2012-03-09 15:30:00+01:00   -0.804644
2012-03-10 15:30:00+01:00   -0.824182
2012-03-11 14:30:00+01:00   -0.553474
2012-03-12 14:30:00+01:00    0.368730
2012-03-13 14:30:00+01:00   -0.718007
2012-03-14 14:30:00+01:00   -1.028170
Freq: D, dtype: float64
In [118]:
ts.index.tz_localize('Asia/Shanghai')
Out[118]:
DatetimeIndex(['2012-03-09 09:30:00+08:00', '2012-03-10 09:30:00+08:00',
               '2012-03-11 09:30:00+08:00', '2012-03-12 09:30:00+08:00',
               '2012-03-13 09:30:00+08:00', '2012-03-14 09:30:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq='D')

Operations with Time Zone−Aware Timestamp Objects

In [119]:
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('America/New_York')
Out[119]:
Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')
In [120]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
stamp_moscow
Out[120]:
Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')
In [121]:
stamp_utc.value
stamp_utc.tz_convert('America/New_York').value
Out[121]:
1299902400000000000
In [122]:
from pandas.tseries.offsets import Hour
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp
stamp + Hour()
Out[122]:
Timestamp('2012-03-12 02:30:00-0400', tz='US/Eastern')
In [123]:
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp
stamp + 2 * Hour()
Out[123]:
Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

Operations Between Different Time Zones

In [124]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')
result = ts1 + ts2
result.index
Out[124]:
DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='B')

Periods and Period Arithmetic

In [125]:
p = pd.Period(2007, freq='A-DEC')
p
Out[125]:
Period('2007', 'A-DEC')
In [126]:
p + 5
p - 2
Out[126]:
Period('2005', 'A-DEC')
In [127]:
pd.Period('2014', freq='A-DEC') - p
Out[127]:
<7 * YearEnds: month=12>
In [128]:
rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')
rng
Out[128]:
PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')
In [129]:
pd.Series(np.random.randn(6), index=rng)
Out[129]:
2000-01   -0.518098
2000-02    0.637825
2000-03    1.278005
2000-04   -2.333105
2000-05    1.366085
2000-06   -0.175901
Freq: M, dtype: float64
In [130]:
values = ['2001Q3', '2002Q2', '2003Q1']
index = pd.PeriodIndex(values, freq='Q-DEC')
index
Out[130]:
PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

Period Frequency Conversion

In [131]:
p = pd.Period('2007', freq='A-DEC')
p
p.asfreq('M', how='start')
p.asfreq('M', how='end')
Out[131]:
Period('2007-12', 'M')
In [132]:
p = pd.Period('2007', freq='A-JUN')
p
p.asfreq('M', 'start')
p.asfreq('M', 'end')
Out[132]:
Period('2007-06', 'M')
In [133]:
p = pd.Period('Aug-2007', 'M')
p.asfreq('A-JUN')
Out[133]:
Period('2008', 'A-JUN')
In [134]:
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts.asfreq('M', how='start')
Out[134]:
2006-01    0.916405
2007-01   -0.573341
2008-01   -0.058994
2009-01   -1.208847
Freq: M, dtype: float64
In [135]:
ts.asfreq('B', how='end')
Out[135]:
2006-12-29    0.916405
2007-12-31   -0.573341
2008-12-31   -0.058994
2009-12-31   -1.208847
Freq: B, dtype: float64

Quarterly Period Frequencies

In [136]:
p = pd.Period('2012Q4', freq='Q-JAN')
p
Out[136]:
Period('2012Q4', 'Q-JAN')
In [137]:
p.asfreq('D', 'start')
p.asfreq('D', 'end')
Out[137]:
Period('2012-01-31', 'D')
In [138]:
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
p4pm
p4pm.to_timestamp()
Out[138]:
Timestamp('2012-01-30 16:00:00')
In [296]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = pd.Series(np.arange(len(rng)), index=rng)
ts
Out[296]:
2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int32

Converting Timestamps to Periods (and Back)

In [140]:
rng = pd.date_range('2000-01-01', periods=3, freq='M')
ts = pd.Series(np.random.randn(3), index=rng)
ts
pts = ts.to_period()
pts
Out[140]:
2000-01    0.089342
2000-02    1.548458
2000-03    1.314688
Freq: M, dtype: float64
In [141]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')
ts2 = pd.Series(np.random.randn(6), index=rng)
ts2
ts2.to_period('M')
Out[141]:
2000-01    0.830298
2000-01   -0.807765
2000-01   -0.243329
2000-02   -0.588877
2000-02    0.345017
2000-02    0.984583
Freq: M, dtype: float64
In [142]:
pts = ts2.to_period()
pts
pts.to_timestamp(how='end')
Out[142]:
2000-01-29 23:59:59.999999999    0.830298
2000-01-30 23:59:59.999999999   -0.807765
2000-01-31 23:59:59.999999999   -0.243329
2000-02-01 23:59:59.999999999   -0.588877
2000-02-02 23:59:59.999999999    0.345017
2000-02-03 23:59:59.999999999    0.984583
Freq: D, dtype: float64

Creating a PeriodIndex from Arrays

In [145]:
data = pd.read_csv('macrodata.csv')
data.head(5)
data.year
data.quarter
Out[145]:
0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
      ... 
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64
In [146]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter,
                       freq='Q-DEC')
index
data.index = index
data.infl
Out[146]:
1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
          ... 
2008Q3   -3.16
2008Q4   -8.79
2009Q1    0.94
2009Q2    3.37
2009Q3    3.56
Freq: Q-DEC, Name: infl, Length: 203, dtype: float64

Resampling and Frequency Conversion

In [147]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts
ts.resample('M').mean()
ts.resample('M', kind='period').mean()
Out[147]:
2000-01   -0.011723
2000-02   -0.234784
2000-03    0.173425
2000-04    0.088157
Freq: M, dtype: float64

Downsampling

In [148]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts
Out[148]:
2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32
In [149]:
ts.resample('5min', closed='right').sum()
Out[149]:
1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32
In [150]:
ts.resample('5min', closed='right').sum()
Out[150]:
1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32
In [151]:
ts.resample('5min', closed='right', label='right').sum()
Out[151]:
2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32
In [152]:
ts.resample('5min', closed='right',
            label='right', loffset='-1s').sum()
Out[152]:
1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32

Open-High-Low-Close (OHLC) resampling

In [153]:
ts.resample('5min').ohlc()
Out[153]:
open high low close
2000-01-01 00:00:00 0 4 0 4
2000-01-01 00:05:00 5 9 5 9
2000-01-01 00:10:00 10 11 10 11

Upsampling and Interpolation

In [154]:
frame = pd.DataFrame(np.random.randn(2, 4),
                     index=pd.date_range('1/1/2000', periods=2,
                                         freq='W-WED'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame
Out[154]:
Colorado Texas New York Ohio
2000-01-05 -1.475456 1.571686 0.193707 -0.535221
2000-01-12 1.417556 0.968227 -1.475125 -2.191394
In [155]:
df_daily = frame.resample('D').asfreq()
df_daily
Out[155]:
Colorado Texas New York Ohio
2000-01-05 -1.475456 1.571686 0.193707 -0.535221
2000-01-06 NaN NaN NaN NaN
2000-01-07 NaN NaN NaN NaN
2000-01-08 NaN NaN NaN NaN
2000-01-09 NaN NaN NaN NaN
2000-01-10 NaN NaN NaN NaN
2000-01-11 NaN NaN NaN NaN
2000-01-12 1.417556 0.968227 -1.475125 -2.191394
In [156]:
frame.resample('D').ffill()
Out[156]:
Colorado Texas New York Ohio
2000-01-05 -1.475456 1.571686 0.193707 -0.535221
2000-01-06 -1.475456 1.571686 0.193707 -0.535221
2000-01-07 -1.475456 1.571686 0.193707 -0.535221
2000-01-08 -1.475456 1.571686 0.193707 -0.535221
2000-01-09 -1.475456 1.571686 0.193707 -0.535221
2000-01-10 -1.475456 1.571686 0.193707 -0.535221
2000-01-11 -1.475456 1.571686 0.193707 -0.535221
2000-01-12 1.417556 0.968227 -1.475125 -2.191394
In [157]:
frame.resample('D').ffill(limit=2)
Out[157]:
Colorado Texas New York Ohio
2000-01-05 -1.475456 1.571686 0.193707 -0.535221
2000-01-06 -1.475456 1.571686 0.193707 -0.535221
2000-01-07 -1.475456 1.571686 0.193707 -0.535221
2000-01-08 NaN NaN NaN NaN
2000-01-09 NaN NaN NaN NaN
2000-01-10 NaN NaN NaN NaN
2000-01-11 NaN NaN NaN NaN
2000-01-12 1.417556 0.968227 -1.475125 -2.191394
In [158]:
frame.resample('W-THU').ffill()
Out[158]:
Colorado Texas New York Ohio
2000-01-06 -1.475456 1.571686 0.193707 -0.535221
2000-01-13 1.417556 0.968227 -1.475125 -2.191394

Resampling with Periods

In [159]:
frame = pd.DataFrame(np.random.randn(24, 4),
                     index=pd.period_range('1-2000', '12-2001',
                                           freq='M'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]
annual_frame = frame.resample('A-DEC').mean()
annual_frame
Out[159]:
Colorado Texas New York Ohio
2000 -0.167259 0.497627 0.035643 0.133613
2001 0.412094 0.196077 -0.131769 -0.100010
In [160]:
# Q-DEC: Quarterly, year ending in December
annual_frame.resample('Q-DEC').ffill()
annual_frame.resample('Q-DEC', convention='end').ffill()
Out[160]:
Colorado Texas New York Ohio
2000Q4 -0.167259 0.497627 0.035643 0.133613
2001Q1 -0.167259 0.497627 0.035643 0.133613
2001Q2 -0.167259 0.497627 0.035643 0.133613
2001Q3 -0.167259 0.497627 0.035643 0.133613
2001Q4 0.412094 0.196077 -0.131769 -0.100010
In [161]:
annual_frame.resample('Q-MAR').ffill()
Out[161]:
Colorado Texas New York Ohio
2000Q4 -0.167259 0.497627 0.035643 0.133613
2001Q1 -0.167259 0.497627 0.035643 0.133613
2001Q2 -0.167259 0.497627 0.035643 0.133613
2001Q3 -0.167259 0.497627 0.035643 0.133613
2001Q4 0.412094 0.196077 -0.131769 -0.100010
2002Q1 0.412094 0.196077 -0.131769 -0.100010
2002Q2 0.412094 0.196077 -0.131769 -0.100010
2002Q3 0.412094 0.196077 -0.131769 -0.100010

Moving Window Functions

In [162]:
close_px_all = pd.read_csv('stock_px_2.csv',
                           parse_dates=True, index_col=0)
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]
close_px = close_px.resample('B').ffill()
In [163]:
close_px.AAPL.plot()
close_px.AAPL.rolling(250).mean().plot()
Out[163]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a16855f640>
In [164]:
plt.figure()
Out[164]:
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
In [165]:
appl_std250 = close_px.AAPL.rolling(250, min_periods=10).std()
appl_std250[5:12]
appl_std250.plot()
Out[165]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a166f5c1f0>
In [166]:
expanding_mean = appl_std250.expanding().mean()
In [167]:
plt.figure()
Out[167]:
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
In [168]:
close_px.rolling(60).mean().plot(logy=True)
Out[168]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a166dcaee0>
In [169]:
close_px.rolling('20D').mean()
Out[169]:
AAPL MSFT XOM
2003-01-02 7.400000 21.110000 29.220000
2003-01-03 7.425000 21.125000 29.230000
2003-01-06 7.433333 21.256667 29.473333
2003-01-07 7.432500 21.425000 29.342500
2003-01-08 7.402000 21.402000 29.240000
... ... ... ...
2011-10-10 389.351429 25.602143 72.527857
2011-10-11 388.505000 25.674286 72.835000
2011-10-12 388.531429 25.810000 73.400714
2011-10-13 388.826429 25.961429 73.905000
2011-10-14 391.038000 26.048667 74.185333

2292 rows × 3 columns

Exponentially Weighted Functions

In [170]:
plt.figure()
Out[170]:
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
In [171]:
aapl_px = close_px.AAPL['2006':'2007']
ma60 = aapl_px.rolling(30, min_periods=20).mean()
ewma60 = aapl_px.ewm(span=30).mean()
ma60.plot(style='k--', label='Simple MA')
ewma60.plot(style='k-', label='EW MA')
plt.legend()
Out[171]:
<matplotlib.legend.Legend at 0x1a167319c10>

Binary Moving Window Functions

In [172]:
plt.figure()
Out[172]:
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
In [173]:
spx_px = close_px_all['SPX']
spx_rets = spx_px.pct_change()
returns = close_px.pct_change()
In [174]:
corr = returns.AAPL.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()
Out[174]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1688d4cd0>
In [175]:
plt.figure()
Out[175]:
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
In [176]:
corr = returns.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()
Out[176]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a168ab6130>

User-Defined Moving Window Functions

In [177]:
plt.figure()
Out[177]:
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
In [178]:
from scipy.stats import percentileofscore
score_at_2percent = lambda x: percentileofscore(x, 0.02)
result = returns.AAPL.rolling(250).apply(score_at_2percent)
result.plot()
Out[178]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a167f8f460>

17.7) Time Series Analysis Project 1

In [257]:
# Importing the dataset
df = pd.read_csv('AirPassengers.csv')
In [258]:
df
Out[258]:
Month #Passengers
0 1949-01 112
1 1949-02 118
2 1949-03 132
3 1949-04 129
4 1949-05 121
... ... ...
139 1960-08 606
140 1960-09 508
141 1960-10 461
142 1960-11 390
143 1960-12 432

144 rows × 2 columns

In [259]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Month        144 non-null    object
 1   #Passengers  144 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.4+ KB
In [260]:
df.describe().T
Out[260]:
count mean std min 25% 50% 75% max
#Passengers 144.0 280.298611 119.966317 104.0 180.0 265.5 360.5 622.0
In [261]:
df.index
Out[261]:
RangeIndex(start=0, stop=144, step=1)
In [262]:
# Parse strings to datetime type
df['Month'] = pd.to_datetime(df['Month'], infer_datetime_format=True)  #convert from string to datetime
df = df.set_index('Month')
df
Out[262]:
#Passengers
Month
1949-01-01 112
1949-02-01 118
1949-03-01 132
1949-04-01 129
1949-05-01 121
... ...
1960-08-01 606
1960-09-01 508
1960-10-01 461
1960-11-01 390
1960-12-01 432

144 rows × 1 columns

In [263]:
df.columns
Out[263]:
Index(['#Passengers'], dtype='object')

17.7.1) EDA

In [264]:
# Checking the Stationarity
plt.plot(df);
As we can observe in the plot above, the mean seems to vary with time violating the Constant Mean property. We can also observe some seasonality as air trips can have seasonal bookings.
In [265]:
#Test whether Timeseries is Stationary or not

from statsmodels.tsa.stattools import adfuller

def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(window=12).mean()
    rolstd = timeseries.rolling(window=12).std()

    #Plot rolling statistics:
    print ('Rolling Statistics Test:')
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Augmented-Dickey-Fuller (ADF) test:
    print ('Results of Augmented-Dickey-Fuller (ADF) Test:')
    
    dftest = adfuller(timeseries['#Passengers'], autolag='AIC')
    
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
    
    if dfoutput.loc['p-value'] < 0.05:
        print('\n>> Reject H0, the dataset can be stationary')
    else:
        print('\n>> Failed to Reject H0, the dataset can be non-stationary!')
    
    
test_stationarity(df)
Rolling Statistics Test:
Results of Augmented-Dickey-Fuller (ADF) Test:
Test Statistic                   0.815369
p-value                          0.991880
#Lags Used                      13.000000
Number of Observations Used    130.000000
Critical Value (1%)             -3.481682
Critical Value (5%)             -2.884042
Critical Value (10%)            -2.578770
dtype: float64

>> Failed to Reject H0, the dataset can be non-stationary!
From the above graph, we see that rolling mean has a trend component and rolling standard deviation is fairly constant with time. Critical values are no where close to the Test Statistics. Hence, we can say that our Time Series at the moment is not stationary!

17.7.2) Converting from Non-Stationary to Stationary dataset

The usual techniques include: 1) Log method 2) Subtracting Simple Rolling Averages 3) Subtracting Exponential Rolling Averages 4) Subtracting Previous values shift() 5) Seasonal decomposition We will apply each method and check for the best suitable one to remove non-stationarity.

Log method

In [270]:
df_log = np.log(df)
df_log.dropna(inplace=True)
test_stationarity(df_log)
Rolling Statistics Test:
Results of Augmented-Dickey-Fuller (ADF) Test:
Test Statistic                  -1.717017
p-value                          0.422367
#Lags Used                      13.000000
Number of Observations Used    130.000000
Critical Value (1%)             -3.481682
Critical Value (5%)             -2.884042
Critical Value (10%)            -2.578770
dtype: float64

>> Failed to Reject H0, the dataset can be non-stationary!

Subtracting Simple Rolling Averages

In [271]:
moving_avg = df_log.rolling(window=12).mean()

df_moving_avg = df_log - moving_avg

df_moving_avg.dropna(inplace=True)
test_stationarity(df_moving_avg)
Rolling Statistics Test:
Results of Augmented-Dickey-Fuller (ADF) Test:
Test Statistic                  -3.162908
p-value                          0.022235
#Lags Used                      13.000000
Number of Observations Used    119.000000
Critical Value (1%)             -3.486535
Critical Value (5%)             -2.886151
Critical Value (10%)            -2.579896
dtype: float64

>> Reject H0, the dataset can be stationary

Subtracting Exponential Rolling Averages

In [273]:
exp_moving_avg = df_log.ewm(halflife=12, min_periods=0, adjust=True).mean()

df_exp_moving_avg = df_log - exp_moving_avg

df_exp_moving_avg.dropna(inplace=True)
test_stationarity(df_exp_moving_avg)
Rolling Statistics Test:
Results of Augmented-Dickey-Fuller (ADF) Test:
Test Statistic                  -3.601262
p-value                          0.005737
#Lags Used                      13.000000
Number of Observations Used    130.000000
Critical Value (1%)             -3.481682
Critical Value (5%)             -2.884042
Critical Value (10%)            -2.578770
dtype: float64

>> Reject H0, the dataset can be stationary

Subtracting Previous values shift()

In [274]:
df_log_shift = df_log - df_log.shift()

df_log_shift.dropna(inplace=True)
test_stationarity(df_log_shift)
Rolling Statistics Test:
Results of Augmented-Dickey-Fuller (ADF) Test:
Test Statistic                  -2.717131
p-value                          0.071121
#Lags Used                      14.000000
Number of Observations Used    128.000000
Critical Value (1%)             -3.482501
Critical Value (5%)             -2.884398
Critical Value (10%)            -2.578960
dtype: float64

>> Failed to Reject H0, the dataset can be non-stationary!

17.8) Time Series Analysis Project 2

In [275]:
import statsmodels.api as sm
In [277]:
df = pd.read_excel("Sample - Superstore.xls")
furniture = df.loc[df['Category'] == 'Furniture']
In [278]:
df.describe()
Out[278]:
Row ID Postal Code Sales Quantity Discount Profit
count 9994.000000 9994.000000 9994.000000 9994.000000 9994.000000 9994.000000
mean 4997.500000 55190.379428 229.858001 3.789574 0.156203 28.656896
std 2885.163629 32063.693350 623.245101 2.225110 0.206452 234.260108
min 1.000000 1040.000000 0.444000 1.000000 0.000000 -6599.978000
25% 2499.250000 23223.000000 17.280000 2.000000 0.000000 1.728750
50% 4997.500000 56430.500000 54.490000 3.000000 0.200000 8.666500
75% 7495.750000 90008.000000 209.940000 5.000000 0.200000 29.364000
max 9994.000000 99301.000000 22638.480000 14.000000 0.800000 8399.976000
In [279]:
furniture['Order Date'].min(), furniture['Order Date'].max()
Out[279]:
(Timestamp('2014-01-06 00:00:00'), Timestamp('2017-12-30 00:00:00'))

17.8.1) Data Preprocessing

In [280]:
furniture.isnull().sum()
Out[280]:
Row ID          0
Order ID        0
Order Date      0
Ship Date       0
Ship Mode       0
               ..
Product Name    0
Sales           0
Quantity        0
Discount        0
Profit          0
Length: 21, dtype: int64
In [281]:
furniture = furniture.groupby('Order Date')['Sales'].sum().reset_index()
In [282]:
# Indexing with time series data
furniture = furniture.set_index('Order Date')
furniture.index
Out[282]:
DatetimeIndex(['2014-01-06', '2014-01-07', '2014-01-10', '2014-01-11',
               '2014-01-13', '2014-01-14', '2014-01-16', '2014-01-19',
               '2014-01-20', '2014-01-21',
               ...
               '2017-12-18', '2017-12-19', '2017-12-21', '2017-12-22',
               '2017-12-23', '2017-12-24', '2017-12-25', '2017-12-28',
               '2017-12-29', '2017-12-30'],
              dtype='datetime64[ns]', name='Order Date', length=889, freq=None)
Our current datetime data can be tricky to work with, therefore, we will use the averages daily sales value for that month instead, and we are using the start of each month as the timestamp.
In [283]:
y = furniture['Sales'].resample('MS').mean()
In [286]:
y['2017':]
Out[286]:
Order Date
2017-01-01     397.602133
2017-02-01     528.179800
2017-03-01     544.672240
2017-04-01     453.297905
2017-05-01     678.302328
2017-06-01     826.460291
2017-07-01     562.524857
2017-08-01     857.881889
2017-09-01    1209.508583
2017-10-01     875.362728
2017-11-01    1277.817759
2017-12-01    1256.298672
Freq: MS, Name: Sales, dtype: float64
In [287]:
y.plot(figsize=(15, 6))
plt.show()
We can also visualize our data using a method called time-series decomposition that allows us to decompose our time series into three distinct components: trend, seasonality, and noise.
In [288]:
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
decomposition = sm.tsa.seasonal_decompose(y, model='additive')
fig = decomposition.plot()
plt.show()

17.8.2) Time series forecasting with ARIMA

This step is parameter Selection for our furniture’s sales ARIMA Time Series Model. Our goal here is to use a “grid search” to find the optimal set of parameters that yields the best performance for our model.
In [292]:
import itertools
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,
                            
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)
            results = mod.fit()
            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic));
        except:
            continue
ARIMA(0, 0, 0)x(0, 0, 0, 12)12 - AIC:769.0817523205915
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(0, 0, 0)x(0, 0, 1, 12)12 - AIC:1591.7771582834314
ARIMA(0, 0, 0)x(0, 1, 0, 12)12 - AIC:477.71701309202774
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\tsa\statespace\sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
  warn('Too few observations to estimate starting parameters%s.'
ARIMA(0, 0, 0)x(0, 1, 1, 12)12 - AIC:302.2702899929659
ARIMA(0, 0, 0)x(1, 0, 0, 12)12 - AIC:497.2314433418337
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(0, 0, 0)x(1, 0, 1, 12)12 - AIC:1499.1878562443173
ARIMA(0, 0, 0)x(1, 1, 0, 12)12 - AIC:318.0047199116341
ARIMA(0, 0, 0)x(1, 1, 1, 12)12 - AIC:304.2488280367359
ARIMA(0, 0, 1)x(0, 0, 0, 12)12 - AIC:720.9252270758116
ARIMA(0, 0, 1)x(0, 0, 1, 12)12 - AIC:3198.689258185266
ARIMA(0, 0, 1)x(0, 1, 0, 12)12 - AIC:466.56074298091494
ARIMA(0, 0, 1)x(0, 1, 1, 12)12 - AIC:291.6261389673041
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(0, 0, 1)x(1, 0, 0, 12)12 - AIC:499.56169988236434
ARIMA(0, 0, 1)x(1, 0, 1, 12)12 - AIC:4041.5287808842295
ARIMA(0, 0, 1)x(1, 1, 0, 12)12 - AIC:319.9884876927767
ARIMA(0, 0, 1)x(1, 1, 1, 12)12 - AIC:291.87255765312216
ARIMA(0, 1, 0)x(0, 0, 0, 12)12 - AIC:677.894766843944
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(0, 1, 0)x(0, 0, 1, 12)12 - AIC:1467.8545262044947
ARIMA(0, 1, 0)x(0, 1, 0, 12)12 - AIC:486.63785671879367
ARIMA(0, 1, 0)x(0, 1, 1, 12)12 - AIC:304.96712281714764
ARIMA(0, 1, 0)x(1, 0, 0, 12)12 - AIC:497.78896630044073
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(0, 1, 0)x(1, 0, 1, 12)12 - AIC:565.6437722003604
ARIMA(0, 1, 0)x(1, 1, 0, 12)12 - AIC:319.7714068109211
ARIMA(0, 1, 0)x(1, 1, 1, 12)12 - AIC:306.9113200214979
ARIMA(0, 1, 1)x(0, 0, 0, 12)12 - AIC:649.9056176709448
ARIMA(0, 1, 1)x(0, 0, 1, 12)12 - AIC:2806.205531806133
ARIMA(0, 1, 1)x(0, 1, 0, 12)12 - AIC:458.8705548483661
ARIMA(0, 1, 1)x(0, 1, 1, 12)12 - AIC:279.5806231718541
ARIMA(0, 1, 1)x(1, 0, 0, 12)12 - AIC:486.18329774398234
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(0, 1, 1)x(1, 0, 1, 12)12 - AIC:2589.716764283984
ARIMA(0, 1, 1)x(1, 1, 0, 12)12 - AIC:310.75743684170055
ARIMA(0, 1, 1)x(1, 1, 1, 12)12 - AIC:281.5576621506635
ARIMA(1, 0, 0)x(0, 0, 0, 12)12 - AIC:692.1645522067712
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(1, 0, 0)x(0, 0, 1, 12)12 - AIC:1452.0540795066543
ARIMA(1, 0, 0)x(0, 1, 0, 12)12 - AIC:479.46321478521355
ARIMA(1, 0, 0)x(0, 1, 1, 12)12 - AIC:304.207767516517
ARIMA(1, 0, 0)x(1, 0, 0, 12)12 - AIC:480.9259367935388
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(1, 0, 0)x(1, 0, 1, 12)12 - AIC:1067.4093502173776
ARIMA(1, 0, 0)x(1, 1, 0, 12)12 - AIC:304.4664675065614
ARIMA(1, 0, 0)x(1, 1, 1, 12)12 - AIC:304.5842692186112
ARIMA(1, 0, 1)x(0, 0, 0, 12)12 - AIC:665.7794442579733
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(1, 0, 1)x(0, 0, 1, 12)12 - AIC:2686.744096307405
ARIMA(1, 0, 1)x(0, 1, 0, 12)12 - AIC:468.363798565863
ARIMA(1, 0, 1)x(0, 1, 1, 12)12 - AIC:293.342219496743
ARIMA(1, 0, 1)x(1, 0, 0, 12)12 - AIC:482.57633240703393
ARIMA(1, 0, 1)x(1, 0, 1, 12)12 - AIC:14102.23986735984
ARIMA(1, 0, 1)x(1, 1, 0, 12)12 - AIC:304.3753786388637
ARIMA(1, 0, 1)x(1, 1, 1, 12)12 - AIC:293.75131882972994
ARIMA(1, 1, 0)x(0, 0, 0, 12)12 - AIC:671.2513547541902
C:\Users\Hrishikesh\anaconda3\lib\site-packages\statsmodels\base\model.py:567: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warn("Maximum Likelihood optimization failed to converge. "
ARIMA(1, 1, 0)x(0, 0, 1, 12)12 - AIC:1465.6491592528278
ARIMA(1, 1, 0)x(0, 1, 0, 12)12 - AIC:479.20034222811347
ARIMA(1, 1, 0)x(0, 1, 1, 12)12 - AIC:300.2130611614424
ARIMA(1, 1, 0)x(1, 0, 0, 12)12 - AIC:475.34036585859496
ARIMA(1, 1, 0)x(1, 0, 1, 12)12 - AIC:2103.820538583254
ARIMA(1, 1, 0)x(1, 1, 0, 12)12 - AIC:300.62709013121264
ARIMA(1, 1, 0)x(1, 1, 1, 12)12 - AIC:302.3264992591895
ARIMA(1, 1, 1)x(0, 0, 0, 12)12 - AIC:649.0318019776258
ARIMA(1, 1, 1)x(0, 0, 1, 12)12 - AIC:2761.6168328417857
ARIMA(1, 1, 1)x(0, 1, 0, 12)12 - AIC:460.4762687397612
ARIMA(1, 1, 1)x(0, 1, 1, 12)12 - AIC:281.3873006930329
ARIMA(1, 1, 1)x(1, 0, 0, 12)12 - AIC:469.5250354658577
ARIMA(1, 1, 1)x(1, 0, 1, 12)12 - AIC:2622.5513169061505
ARIMA(1, 1, 1)x(1, 1, 0, 12)12 - AIC:297.787543985313
ARIMA(1, 1, 1)x(1, 1, 1, 12)12 - AIC:283.36610150596636
The above output suggests that SARIMAX(1, 1, 1)x(1, 1, 0, 12) yields the lowest AIC value of 297.78. Therefore we should consider this to be optimal option.

17.8.3) Fitting the ARIMA model

In [293]:
mod = sm.tsa.statespace.SARIMAX(y,
                                order=(1, 1, 1),
                                seasonal_order=(1, 1, 0, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0146      0.342      0.043      0.966      -0.655       0.684
ma.L1         -1.0000      0.360     -2.781      0.005      -1.705      -0.295
ar.S.L12      -0.0253      0.042     -0.609      0.543      -0.107       0.056
sigma2      2.958e+04   1.22e-05   2.43e+09      0.000    2.96e+04    2.96e+04
==============================================================================
In [294]:
results.plot_diagnostics(figsize=(16, 8))
plt.show()

17.8.4) Validation

In [295]:
pred = results.get_prediction(start=pd.to_datetime('2017-01-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = y['2014':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()

**18) Natural Language Processing (NLP):-**

In [180]:
# Importing the required libraries:
import nltk
In [181]:
# Importing the dataset
# nltk.download_shell()
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

**19) Deep Learning:-**

TensorFlow and Keras are the main libraries used for Deep Learninf and Neural Networks.
In [24]:
import tensorflow as tf

19.1) TensorFlow:

In [9]:
df = pd.read_csv('fake_reg.csv')
df.head()
Out[9]:
price feature1 feature2
0 461.527929 999.787558 999.766096
1 548.130011 998.861615 1001.042403
2 410.297162 1000.070267 998.844015
3 540.382220 999.952251 1000.440940
4 546.024553 1000.446011 1000.338531
In [11]:
df.describe().T
Out[11]:
count mean std min 25% 50% 75% max
price 1000.0 498.673029 93.785431 223.346793 433.025732 502.382117 564.921588 774.407854
feature1 1000.0 1000.014171 0.974018 997.058347 999.332068 1000.009915 1000.637580 1003.207934
feature2 1000.0 999.979847 0.948330 996.995651 999.316106 1000.002243 1000.645380 1002.666308
In [12]:
sns.pairplot(df);
In [13]:
X = df[['feature1', 'feature2']].values
y = df['price']
In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
In [18]:
sc = StandardScaler()
In [21]:
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
In [23]:
X_train.max()
Out[23]:
3.346290096256661

Building a Model

We will create a basic Sequential model and then kee making it dense by adding layers.

Choosing an optimizer and loss

Keep in mind what kind of problem you are trying to solve:

# For a multi-class classification problem
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# For a binary classification problem
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# For a mean squared error regression problem
model.compile(optimizer='rmsprop',
              loss='mse')
In [31]:
## Building a Neural Network Model
model_ = tf.keras.Sequential([tf.keras.layers.Dense(units=4, activation='relu'),
                    tf.keras.layers.Dense(units=2, activation='relu'),
                    tf.keras.layers.Dense(units=1)]) # First layer with 4 Neurons, nest with 2 and the final Output layer.



# Another way to do build such a model is:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
# Add Layers now:
model.add(Dense(units=4, activation='relu'))   # A Rectlinear Activation Function Layer with 4 Neurons
model.add(Dense(units=2, activation='relu'))   # A Rectlinear Activation Function Layer with 4 Neurons
model.add(Dense(units=1))   # Final Output Layer

model.compile(optimizer='rmsprop', loss='mse')

Training

Below are some common definitions that are necessary to know and understand to correctly utilize Keras:

  • Sample: one element of a dataset.
    • Example: one image is a sample in a convolutional network
    • Example: one audio file is a sample for a speech recognition model
  • Batch: a set of N samples. The samples in a batch are processed independently, in parallel. If training, a batch results in only one update to the model.A batch generally approximates the distribution of the input data better than a single input. The larger the batch, the better the approximation; however, it is also true that the batch will take longer to process and will still result in only one update. For inference (evaluate/predict), it is recommended to pick a batch size that is as large as you can afford without going out of memory (since larger batches will usually result in faster evaluation/prediction).
  • Epoch: an arbitrary cutoff, generally defined as "one pass over the entire dataset", used to separate training into distinct phases, which is useful for logging and periodic evaluation.
  • When using validation_data or validation_split with the fit method of Keras models, evaluation will be run at the end of every epoch.
  • Within Keras, there is the ability to add callbacks specifically designed to be run at the end of an epoch. Examples of these are learning rate changes and model checkpointing (saving).
In [32]:
model.fit(X_train,y_train,epochs=250)
Epoch 1/250
24/24 [==============================] - 2s 3ms/step - loss: 256398.4062
Epoch 2/250
24/24 [==============================] - 0s 1ms/step - loss: 256342.4219
Epoch 3/250
24/24 [==============================] - 0s 5ms/step - loss: 256283.2812
Epoch 4/250
24/24 [==============================] - 0s 1ms/step - loss: 256218.4062
Epoch 5/250
24/24 [==============================] - 0s 2ms/step - loss: 256147.6250
Epoch 6/250
24/24 [==============================] - 0s 2ms/step - loss: 256071.5312
Epoch 7/250
24/24 [==============================] - 0s 3ms/step - loss: 255991.0469
Epoch 8/250
24/24 [==============================] - 0s 1ms/step - loss: 255905.7500
Epoch 9/250
24/24 [==============================] - 0s 1ms/step - loss: 255813.8906
Epoch 10/250
24/24 [==============================] - 0s 1ms/step - loss: 255715.8750
Epoch 11/250
24/24 [==============================] - 0s 912us/step - loss: 255611.3281
Epoch 12/250
24/24 [==============================] - 0s 1ms/step - loss: 255498.6875
Epoch 13/250
24/24 [==============================] - 0s 1ms/step - loss: 255377.5312
Epoch 14/250
24/24 [==============================] - 0s 954us/step - loss: 255247.5938
Epoch 15/250
24/24 [==============================] - 0s 1ms/step - loss: 255109.5625
Epoch 16/250
24/24 [==============================] - 0s 954us/step - loss: 254962.2812
Epoch 17/250
24/24 [==============================] - 0s 997us/step - loss: 254806.9062
Epoch 18/250
24/24 [==============================] - 0s 2ms/step - loss: 254640.3438
Epoch 19/250
24/24 [==============================] - 0s 1ms/step - loss: 254463.5156
Epoch 20/250
24/24 [==============================] - 0s 2ms/step - loss: 254276.7500
Epoch 21/250
24/24 [==============================] - 0s 1ms/step - loss: 254078.4375
Epoch 22/250
24/24 [==============================] - 0s 1ms/step - loss: 253869.0312
Epoch 23/250
24/24 [==============================] - 0s 2ms/step - loss: 253649.9219
Epoch 24/250
24/24 [==============================] - 0s 1ms/step - loss: 253417.6562
Epoch 25/250
24/24 [==============================] - 0s 1ms/step - loss: 253171.5469
Epoch 26/250
24/24 [==============================] - 0s 3ms/step - loss: 252914.5000
Epoch 27/250
24/24 [==============================] - 0s 3ms/step - loss: 252644.8594
Epoch 28/250
24/24 [==============================] - 0s 3ms/step - loss: 252361.3281
Epoch 29/250
24/24 [==============================] - 0s 1ms/step - loss: 252065.1562
Epoch 30/250
24/24 [==============================] - 0s 1ms/step - loss: 251756.1875
Epoch 31/250
24/24 [==============================] - 0s 2ms/step - loss: 251434.9844
Epoch 32/250
24/24 [==============================] - 0s 2ms/step - loss: 251099.0312
Epoch 33/250
24/24 [==============================] - 0s 3ms/step - loss: 250748.6719
Epoch 34/250
24/24 [==============================] - 0s 1ms/step - loss: 250383.1875
Epoch 35/250
24/24 [==============================] - 0s 3ms/step - loss: 250003.7344
Epoch 36/250
24/24 [==============================] - 0s 1ms/step - loss: 249609.1719
Epoch 37/250
24/24 [==============================] - 0s 1ms/step - loss: 249195.2812
Epoch 38/250
24/24 [==============================] - 0s 2ms/step - loss: 248766.2969
Epoch 39/250
24/24 [==============================] - 0s 1ms/step - loss: 248323.3438
Epoch 40/250
24/24 [==============================] - 0s 1ms/step - loss: 247862.4375
Epoch 41/250
24/24 [==============================] - 0s 1ms/step - loss: 247387.3906
Epoch 42/250
24/24 [==============================] - 0s 2ms/step - loss: 246895.1406
Epoch 43/250
24/24 [==============================] - 0s 1ms/step - loss: 246387.1094
Epoch 44/250
24/24 [==============================] - 0s 1ms/step - loss: 245859.8438
Epoch 45/250
24/24 [==============================] - 0s 1ms/step - loss: 245314.6094
Epoch 46/250
24/24 [==============================] - 0s 1ms/step - loss: 244757.2656
Epoch 47/250
24/24 [==============================] - 0s 1ms/step - loss: 244182.5312
Epoch 48/250
24/24 [==============================] - 0s 1ms/step - loss: 243586.6250
Epoch 49/250
24/24 [==============================] - 0s 1ms/step - loss: 242974.0781
Epoch 50/250
24/24 [==============================] - 0s 1ms/step - loss: 242342.6719
Epoch 51/250
24/24 [==============================] - 0s 2ms/step - loss: 241692.9844
Epoch 52/250
24/24 [==============================] - 0s 1ms/step - loss: 241022.9062
Epoch 53/250
24/24 [==============================] - 0s 1ms/step - loss: 240335.9844
Epoch 54/250
24/24 [==============================] - 0s 2ms/step - loss: 239629.9375
Epoch 55/250
24/24 [==============================] - 0s 1ms/step - loss: 238906.1562
Epoch 56/250
24/24 [==============================] - 0s 889us/step - loss: 238163.2500
Epoch 57/250
24/24 [==============================] - 0s 911us/step - loss: 237401.0938
Epoch 58/250
24/24 [==============================] - 0s 1ms/step - loss: 236618.9219
Epoch 59/250
24/24 [==============================] - 0s 1ms/step - loss: 235818.5312
Epoch 60/250
24/24 [==============================] - 0s 1ms/step - loss: 234990.7031
Epoch 61/250
24/24 [==============================] - 0s 1ms/step - loss: 234147.3281
Epoch 62/250
24/24 [==============================] - 0s 1ms/step - loss: 233290.3281
Epoch 63/250
24/24 [==============================] - 0s 1ms/step - loss: 232409.2188
Epoch 64/250
24/24 [==============================] - 0s 1ms/step - loss: 231511.2344
Epoch 65/250
24/24 [==============================] - 0s 1ms/step - loss: 230586.0312
Epoch 66/250
24/24 [==============================] - 0s 1ms/step - loss: 229639.0156
Epoch 67/250
24/24 [==============================] - 0s 1ms/step - loss: 228669.3906
Epoch 68/250
24/24 [==============================] - 0s 1ms/step - loss: 227689.9062
Epoch 69/250
24/24 [==============================] - 0s 824us/step - loss: 226691.0469
Epoch 70/250
24/24 [==============================] - 0s 889us/step - loss: 225660.7812
Epoch 71/250
24/24 [==============================] - 0s 857us/step - loss: 224605.2969
Epoch 72/250
24/24 [==============================] - 0s 819us/step - loss: 223536.0625
Epoch 73/250
24/24 [==============================] - 0s 867us/step - loss: 222444.0469
Epoch 74/250
24/24 [==============================] - 0s 997us/step - loss: 221333.7812
Epoch 75/250
24/24 [==============================] - 0s 824us/step - loss: 220206.2969
Epoch 76/250
24/24 [==============================] - 0s 780us/step - loss: 219051.7812
Epoch 77/250
24/24 [==============================] - 0s 824us/step - loss: 217881.4062
Epoch 78/250
24/24 [==============================] - 0s 902us/step - loss: 216683.1406
Epoch 79/250
24/24 [==============================] - 0s 830us/step - loss: 215463.7812
Epoch 80/250
24/24 [==============================] - 0s 824us/step - loss: 214222.0312
Epoch 81/250
24/24 [==============================] - 0s 759us/step - loss: 212963.4375
Epoch 82/250
24/24 [==============================] - 0s 911us/step - loss: 211683.7812
Epoch 83/250
24/24 [==============================] - 0s 834us/step - loss: 210380.1406
Epoch 84/250
24/24 [==============================] - 0s 954us/step - loss: 209056.9844
Epoch 85/250
24/24 [==============================] - 0s 781us/step - loss: 207707.7500
Epoch 86/250
24/24 [==============================] - 0s 1ms/step - loss: 206333.4688
Epoch 87/250
24/24 [==============================] - 0s 1ms/step - loss: 204939.3125
Epoch 88/250
24/24 [==============================] - 0s 2ms/step - loss: 203526.0938
Epoch 89/250
24/24 [==============================] - 0s 1ms/step - loss: 202085.7344
Epoch 90/250
24/24 [==============================] - 0s 1ms/step - loss: 200624.6875
Epoch 91/250
24/24 [==============================] - 0s 1ms/step - loss: 199143.0469
Epoch 92/250
24/24 [==============================] - 0s 1ms/step - loss: 197630.6562
Epoch 93/250
24/24 [==============================] - 0s 1ms/step - loss: 196093.9688
Epoch 94/250
24/24 [==============================] - 0s 2ms/step - loss: 194534.6406
Epoch 95/250
24/24 [==============================] - 0s 998us/step - loss: 192951.5156
Epoch 96/250
24/24 [==============================] - 0s 1ms/step - loss: 191356.8438
Epoch 97/250
24/24 [==============================] - 0s 1ms/step - loss: 189736.8125
Epoch 98/250
24/24 [==============================] - 0s 1ms/step - loss: 188090.6719
Epoch 99/250
24/24 [==============================] - 0s 1ms/step - loss: 186429.1406
Epoch 100/250
24/24 [==============================] - 0s 1ms/step - loss: 184734.5469
Epoch 101/250
24/24 [==============================] - 0s 1ms/step - loss: 183016.6406
Epoch 102/250
24/24 [==============================] - ETA: 0s - loss: 178623.35 - 0s 1ms/step - loss: 181269.6562
Epoch 103/250
24/24 [==============================] - 0s 1ms/step - loss: 179511.3750
Epoch 104/250
24/24 [==============================] - 0s 1ms/step - loss: 177717.6875
Epoch 105/250
24/24 [==============================] - 0s 2ms/step - loss: 175899.1094
Epoch 106/250
24/24 [==============================] - 0s 2ms/step - loss: 174065.5938
Epoch 107/250
24/24 [==============================] - 0s 2ms/step - loss: 172218.2344
Epoch 108/250
24/24 [==============================] - 0s 1ms/step - loss: 170336.0000
Epoch 109/250
24/24 [==============================] - 0s 2ms/step - loss: 168427.6875
Epoch 110/250
24/24 [==============================] - 0s 1ms/step - loss: 166506.0312
Epoch 111/250
24/24 [==============================] - 0s 2ms/step - loss: 164543.8438
Epoch 112/250
24/24 [==============================] - 0s 2ms/step - loss: 162577.3125
Epoch 113/250
24/24 [==============================] - 0s 1ms/step - loss: 160580.4219
Epoch 114/250
24/24 [==============================] - 0s 737us/step - loss: 158576.3906
Epoch 115/250
24/24 [==============================] - 0s 1ms/step - loss: 156545.5781
Epoch 116/250
24/24 [==============================] - 0s 1ms/step - loss: 154488.5469
Epoch 117/250
24/24 [==============================] - 0s 1ms/step - loss: 152410.2344
Epoch 118/250
24/24 [==============================] - 0s 1ms/step - loss: 150319.2500
Epoch 119/250
24/24 [==============================] - 0s 867us/step - loss: 148204.7344
Epoch 120/250
24/24 [==============================] - 0s 1ms/step - loss: 146071.7188
Epoch 121/250
24/24 [==============================] - 0s 954us/step - loss: 143927.4844
Epoch 122/250
24/24 [==============================] - 0s 824us/step - loss: 141767.7188
Epoch 123/250
24/24 [==============================] - 0s 1ms/step - loss: 139575.7188
Epoch 124/250
24/24 [==============================] - 0s 957us/step - loss: 137369.5781
Epoch 125/250
24/24 [==============================] - 0s 984us/step - loss: 135144.6562
Epoch 126/250
24/24 [==============================] - 0s 824us/step - loss: 132921.1875
Epoch 127/250
24/24 [==============================] - 0s 911us/step - loss: 130681.0859
Epoch 128/250
24/24 [==============================] - 0s 831us/step - loss: 128399.1172
Epoch 129/250
24/24 [==============================] - 0s 812us/step - loss: 126103.8047
Epoch 130/250
24/24 [==============================] - 0s 781us/step - loss: 123818.8672
Epoch 131/250
24/24 [==============================] - 0s 737us/step - loss: 121540.9141
Epoch 132/250
24/24 [==============================] - 0s 781us/step - loss: 119217.5703
Epoch 133/250
24/24 [==============================] - 0s 737us/step - loss: 116896.2969
Epoch 134/250
24/24 [==============================] - 0s 737us/step - loss: 114559.8594
Epoch 135/250
24/24 [==============================] - 0s 824us/step - loss: 112205.5469
Epoch 136/250
24/24 [==============================] - 0s 911us/step - loss: 109867.0078
Epoch 137/250
24/24 [==============================] - 0s 767us/step - loss: 107515.9922
Epoch 138/250
24/24 [==============================] - 0s 720us/step - loss: 105153.2500
Epoch 139/250
24/24 [==============================] - 0s 1ms/step - loss: 102754.3438
Epoch 140/250
24/24 [==============================] - 0s 737us/step - loss: 100371.0859
Epoch 141/250
24/24 [==============================] - 0s 737us/step - loss: 97983.7969
Epoch 142/250
24/24 [==============================] - 0s 867us/step - loss: 95585.3125
Epoch 143/250
24/24 [==============================] - 0s 885us/step - loss: 93202.7422
Epoch 144/250
24/24 [==============================] - 0s 867us/step - loss: 90823.1016
Epoch 145/250
24/24 [==============================] - 0s 824us/step - loss: 88446.7109
Epoch 146/250
24/24 [==============================] - 0s 867us/step - loss: 86056.4531
Epoch 147/250
24/24 [==============================] - 0s 867us/step - loss: 83683.5078
Epoch 148/250
24/24 [==============================] - 0s 910us/step - loss: 81314.5391
Epoch 149/250
24/24 [==============================] - 0s 773us/step - loss: 78984.5469
Epoch 150/250
24/24 [==============================] - 0s 851us/step - loss: 76638.7891
Epoch 151/250
24/24 [==============================] - 0s 964us/step - loss: 74287.8359
Epoch 152/250
24/24 [==============================] - 0s 917us/step - loss: 71960.0234
Epoch 153/250
24/24 [==============================] - 0s 824us/step - loss: 69654.0078
Epoch 154/250
24/24 [==============================] - 0s 824us/step - loss: 67361.0156
Epoch 155/250
24/24 [==============================] - 0s 849us/step - loss: 65083.8555
Epoch 156/250
24/24 [==============================] - 0s 926us/step - loss: 62805.0703
Epoch 157/250
24/24 [==============================] - 0s 800us/step - loss: 60565.4883
Epoch 158/250
24/24 [==============================] - 0s 1ms/step - loss: 58334.6719
Epoch 159/250
24/24 [==============================] - 0s 824us/step - loss: 56098.3984
Epoch 160/250
24/24 [==============================] - 0s 911us/step - loss: 53909.1094
Epoch 161/250
24/24 [==============================] - 0s 910us/step - loss: 51762.8594
Epoch 162/250
24/24 [==============================] - 0s 824us/step - loss: 49625.7812
Epoch 163/250
24/24 [==============================] - 0s 869us/step - loss: 47520.8164
Epoch 164/250
24/24 [==============================] - 0s 853us/step - loss: 45396.0625
Epoch 165/250
24/24 [==============================] - 0s 867us/step - loss: 43321.0625
Epoch 166/250
24/24 [==============================] - 0s 824us/step - loss: 41281.5586
Epoch 167/250
24/24 [==============================] - 0s 892us/step - loss: 39264.3242
Epoch 168/250
24/24 [==============================] - 0s 878us/step - loss: 37256.5039
Epoch 169/250
24/24 [==============================] - 0s 909us/step - loss: 35334.1016
Epoch 170/250
24/24 [==============================] - 0s 805us/step - loss: 33425.5469
Epoch 171/250
24/24 [==============================] - 0s 781us/step - loss: 31503.3223
Epoch 172/250
24/24 [==============================] - 0s 824us/step - loss: 29659.9414
Epoch 173/250
24/24 [==============================] - 0s 867us/step - loss: 27871.5332
Epoch 174/250
24/24 [==============================] - 0s 1ms/step - loss: 26140.9492
Epoch 175/250
24/24 [==============================] - 0s 1ms/step - loss: 24416.6074
Epoch 176/250
24/24 [==============================] - 0s 989us/step - loss: 22728.5195
Epoch 177/250
24/24 [==============================] - 0s 845us/step - loss: 21092.8750
Epoch 178/250
24/24 [==============================] - 0s 730us/step - loss: 19510.5488
Epoch 179/250
24/24 [==============================] - 0s 911us/step - loss: 18006.8418
Epoch 180/250
24/24 [==============================] - 0s 867us/step - loss: 16559.4238
Epoch 181/250
24/24 [==============================] - 0s 850us/step - loss: 15186.8711
Epoch 182/250
24/24 [==============================] - 0s 771us/step - loss: 13873.5244
Epoch 183/250
24/24 [==============================] - 0s 810us/step - loss: 12604.2383
Epoch 184/250
24/24 [==============================] - 0s 822us/step - loss: 11422.0195
Epoch 185/250
24/24 [==============================] - 0s 747us/step - loss: 10290.5127
Epoch 186/250
24/24 [==============================] - 0s 839us/step - loss: 9241.3926
Epoch 187/250
24/24 [==============================] - 0s 867us/step - loss: 8288.5283
Epoch 188/250
24/24 [==============================] - 0s 954us/step - loss: 7393.7388
Epoch 189/250
24/24 [==============================] - 0s 844us/step - loss: 6560.9312
Epoch 190/250
24/24 [==============================] - 0s 816us/step - loss: 5816.6045
Epoch 191/250
24/24 [==============================] - 0s 867us/step - loss: 5150.1802
Epoch 192/250
24/24 [==============================] - 0s 997us/step - loss: 4598.1621
Epoch 193/250
24/24 [==============================] - 0s 986us/step - loss: 4110.8638
Epoch 194/250
24/24 [==============================] - 0s 820us/step - loss: 3698.5842
Epoch 195/250
24/24 [==============================] - 0s 781us/step - loss: 3362.3948
Epoch 196/250
24/24 [==============================] - 0s 860us/step - loss: 3079.6223
Epoch 197/250
24/24 [==============================] - 0s 821us/step - loss: 2838.4600
Epoch 198/250
24/24 [==============================] - 0s 1ms/step - loss: 2627.6238
Epoch 199/250
24/24 [==============================] - 0s 1ms/step - loss: 2439.5715
Epoch 200/250
24/24 [==============================] - 0s 1ms/step - loss: 2262.8105
Epoch 201/250
24/24 [==============================] - 0s 2ms/step - loss: 2099.8989
Epoch 202/250
24/24 [==============================] - 0s 1ms/step - loss: 1951.3390
Epoch 203/250
24/24 [==============================] - 0s 1ms/step - loss: 1815.5380
Epoch 204/250
24/24 [==============================] - 0s 1ms/step - loss: 1688.4412
Epoch 205/250
24/24 [==============================] - 0s 1ms/step - loss: 1571.3799
Epoch 206/250
24/24 [==============================] - 0s 867us/step - loss: 1462.3883
Epoch 207/250
24/24 [==============================] - 0s 953us/step - loss: 1362.6858
Epoch 208/250
24/24 [==============================] - 0s 1ms/step - loss: 1274.4720
Epoch 209/250
24/24 [==============================] - 0s 759us/step - loss: 1197.7026
Epoch 210/250
24/24 [==============================] - 0s 911us/step - loss: 1130.5548
Epoch 211/250
24/24 [==============================] - 0s 787us/step - loss: 1068.2472
Epoch 212/250
24/24 [==============================] - 0s 781us/step - loss: 1010.5568
Epoch 213/250
24/24 [==============================] - 0s 780us/step - loss: 961.0089
Epoch 214/250
24/24 [==============================] - 0s 831us/step - loss: 916.1057
Epoch 215/250
24/24 [==============================] - 0s 820us/step - loss: 877.5452
Epoch 216/250
24/24 [==============================] - 0s 824us/step - loss: 841.0746
Epoch 217/250
24/24 [==============================] - 0s 911us/step - loss: 808.5581
Epoch 218/250
24/24 [==============================] - 0s 748us/step - loss: 776.7404
Epoch 219/250
24/24 [==============================] - 0s 737us/step - loss: 744.0405
Epoch 220/250
24/24 [==============================] - 0s 737us/step - loss: 714.0671
Epoch 221/250
24/24 [==============================] - 0s 824us/step - loss: 688.1179
Epoch 222/250
24/24 [==============================] - 0s 954us/step - loss: 661.0629
Epoch 223/250
24/24 [==============================] - 0s 780us/step - loss: 634.5450
Epoch 224/250
24/24 [==============================] - 0s 788us/step - loss: 608.6077
Epoch 225/250
24/24 [==============================] - 0s 778us/step - loss: 586.6155
Epoch 226/250
24/24 [==============================] - 0s 781us/step - loss: 563.7560
Epoch 227/250
24/24 [==============================] - 0s 1ms/step - loss: 542.2021
Epoch 228/250
24/24 [==============================] - 0s 1ms/step - loss: 521.7055
Epoch 229/250
24/24 [==============================] - 0s 1ms/step - loss: 502.9783
Epoch 230/250
24/24 [==============================] - 0s 997us/step - loss: 483.8648
Epoch 231/250
24/24 [==============================] - 0s 997us/step - loss: 465.2363
Epoch 232/250
24/24 [==============================] - 0s 1ms/step - loss: 447.3253
Epoch 233/250
24/24 [==============================] - 0s 1ms/step - loss: 429.0987
Epoch 234/250
24/24 [==============================] - 0s 1ms/step - loss: 413.0371
Epoch 235/250
24/24 [==============================] - 0s 1ms/step - loss: 395.5634
Epoch 236/250
24/24 [==============================] - 0s 954us/step - loss: 378.7288
Epoch 237/250
24/24 [==============================] - 0s 954us/step - loss: 363.6688
Epoch 238/250
24/24 [==============================] - 0s 1ms/step - loss: 348.9963
Epoch 239/250
24/24 [==============================] - 0s 1ms/step - loss: 334.7599
Epoch 240/250
24/24 [==============================] - 0s 954us/step - loss: 321.8860
Epoch 241/250
24/24 [==============================] - 0s 954us/step - loss: 309.9259
Epoch 242/250
24/24 [==============================] - 0s 1ms/step - loss: 298.5580
Epoch 243/250
24/24 [==============================] - 0s 1ms/step - loss: 285.8920
Epoch 244/250
24/24 [==============================] - 0s 1ms/step - loss: 274.5003
Epoch 245/250
24/24 [==============================] - 0s 822us/step - loss: 264.5237
Epoch 246/250
24/24 [==============================] - 0s 905us/step - loss: 254.6700
Epoch 247/250
24/24 [==============================] - 0s 1ms/step - loss: 244.9643
Epoch 248/250
24/24 [==============================] - 0s 824us/step - loss: 235.3119
Epoch 249/250
24/24 [==============================] - 0s 911us/step - loss: 225.2330
Epoch 250/250
24/24 [==============================] - 0s 911us/step - loss: 215.9572
Out[32]:
<tensorflow.python.keras.callbacks.History at 0x1ca62efde50>

Evaluation

Let's evaluate our performance on our training set and our test set. We can compare these two performances to check for overfitting.

In [33]:
model.history.history
Out[33]:
{'loss': [256398.40625,
  256342.421875,
  256283.28125,
  256218.40625,
  256147.625,
  256071.53125,
  255991.046875,
  255905.75,
  255813.890625,
  255715.875,
  255611.328125,
  255498.6875,
  255377.53125,
  255247.59375,
  255109.5625,
  254962.28125,
  254806.90625,
  254640.34375,
  254463.515625,
  254276.75,
  254078.4375,
  253869.03125,
  253649.921875,
  253417.65625,
  253171.546875,
  252914.5,
  252644.859375,
  252361.328125,
  252065.15625,
  251756.1875,
  251434.984375,
  251099.03125,
  250748.671875,
  250383.1875,
  250003.734375,
  249609.171875,
  249195.28125,
  248766.296875,
  248323.34375,
  247862.4375,
  247387.390625,
  246895.140625,
  246387.109375,
  245859.84375,
  245314.609375,
  244757.265625,
  244182.53125,
  243586.625,
  242974.078125,
  242342.671875,
  241692.984375,
  241022.90625,
  240335.984375,
  239629.9375,
  238906.15625,
  238163.25,
  237401.09375,
  236618.921875,
  235818.53125,
  234990.703125,
  234147.328125,
  233290.328125,
  232409.21875,
  231511.234375,
  230586.03125,
  229639.015625,
  228669.390625,
  227689.90625,
  226691.046875,
  225660.78125,
  224605.296875,
  223536.0625,
  222444.046875,
  221333.78125,
  220206.296875,
  219051.78125,
  217881.40625,
  216683.140625,
  215463.78125,
  214222.03125,
  212963.4375,
  211683.78125,
  210380.140625,
  209056.984375,
  207707.75,
  206333.46875,
  204939.3125,
  203526.09375,
  202085.734375,
  200624.6875,
  199143.046875,
  197630.65625,
  196093.96875,
  194534.640625,
  192951.515625,
  191356.84375,
  189736.8125,
  188090.671875,
  186429.140625,
  184734.546875,
  183016.640625,
  181269.65625,
  179511.375,
  177717.6875,
  175899.109375,
  174065.59375,
  172218.234375,
  170336.0,
  168427.6875,
  166506.03125,
  164543.84375,
  162577.3125,
  160580.421875,
  158576.390625,
  156545.578125,
  154488.546875,
  152410.234375,
  150319.25,
  148204.734375,
  146071.71875,
  143927.484375,
  141767.71875,
  139575.71875,
  137369.578125,
  135144.65625,
  132921.1875,
  130681.0859375,
  128399.1171875,
  126103.8046875,
  123818.8671875,
  121540.9140625,
  119217.5703125,
  116896.296875,
  114559.859375,
  112205.546875,
  109867.0078125,
  107515.9921875,
  105153.25,
  102754.34375,
  100371.0859375,
  97983.796875,
  95585.3125,
  93202.7421875,
  90823.1015625,
  88446.7109375,
  86056.453125,
  83683.5078125,
  81314.5390625,
  78984.546875,
  76638.7890625,
  74287.8359375,
  71960.0234375,
  69654.0078125,
  67361.015625,
  65083.85546875,
  62805.0703125,
  60565.48828125,
  58334.671875,
  56098.3984375,
  53909.109375,
  51762.859375,
  49625.78125,
  47520.81640625,
  45396.0625,
  43321.0625,
  41281.55859375,
  39264.32421875,
  37256.50390625,
  35334.1015625,
  33425.546875,
  31503.322265625,
  29659.94140625,
  27871.533203125,
  26140.94921875,
  24416.607421875,
  22728.51953125,
  21092.875,
  19510.548828125,
  18006.841796875,
  16559.423828125,
  15186.87109375,
  13873.5244140625,
  12604.23828125,
  11422.01953125,
  10290.5126953125,
  9241.392578125,
  8288.5283203125,
  7393.73876953125,
  6560.93115234375,
  5816.6044921875,
  5150.18017578125,
  4598.162109375,
  4110.86376953125,
  3698.584228515625,
  3362.394775390625,
  3079.622314453125,
  2838.4599609375,
  2627.623779296875,
  2439.571533203125,
  2262.810546875,
  2099.89892578125,
  1951.3389892578125,
  1815.5379638671875,
  1688.441162109375,
  1571.3798828125,
  1462.3883056640625,
  1362.685791015625,
  1274.4720458984375,
  1197.70263671875,
  1130.5548095703125,
  1068.2471923828125,
  1010.5567626953125,
  961.0089111328125,
  916.1056518554688,
  877.5452270507812,
  841.0746459960938,
  808.55810546875,
  776.7404174804688,
  744.04052734375,
  714.067138671875,
  688.1178588867188,
  661.0629272460938,
  634.5449829101562,
  608.6077270507812,
  586.615478515625,
  563.7559814453125,
  542.2021484375,
  521.7055053710938,
  502.97833251953125,
  483.8647766113281,
  465.2362976074219,
  447.32525634765625,
  429.09869384765625,
  413.0370788574219,
  395.5634460449219,
  378.7287902832031,
  363.6687927246094,
  348.996337890625,
  334.7598876953125,
  321.885986328125,
  309.9258728027344,
  298.55804443359375,
  285.89202880859375,
  274.50030517578125,
  264.52374267578125,
  254.67001342773438,
  244.96429443359375,
  235.31192016601562,
  225.2329864501953,
  215.95716857910156]}
In [34]:
loss = model.history.history['loss']
In [35]:
sns.lineplot(x=range(len(loss)),y=loss)
plt.title("Training Loss per Epoch");

Compare final evaluation (MSE) on training set and test set.

In [36]:
model.metrics_names
Out[36]:
['loss']
In [38]:
# Loss on the Training data
model.evaluate(X_train, y_train)
24/24 [==============================] - 0s 800us/step - loss: 210.8674
Out[38]:
210.867431640625
In [37]:
# Loss on the Test data
model.evaluate(X_test, y_test)
8/8 [==============================] - 0s 1ms/step - loss: 308.6480
Out[37]:
308.6480407714844
In [40]:
test_predictions = model.predict(X_test)
test_predictions
Out[40]:
array([[397.48935],
       [619.2009 ],
       [586.89   ],
       [567.99805],
       [462.87958],
       [574.4009 ],
       [510.7125 ],
       [451.4669 ],
       [544.6207 ],
       [439.55515],
       [607.21716],
       [541.67206],
       [411.6527 ],
       [401.53928],
       [645.50024],
       [429.50897],
       [500.82532],
       [657.68176],
       [659.5127 ],
       [559.5751 ],
       [360.58017],
       [437.77023],
       [375.03494],
       [370.455  ],
       [561.4778 ],
       [606.9748 ],
       [526.86926],
       [420.2515 ],
       [652.6587 ],
       [405.72812],
       [434.74744],
       [478.5875 ],
       [431.0074 ],
       [679.23737],
       [416.11252],
       [410.8304 ],
       [493.77118],
       [544.8327 ],
       [504.95285],
       [454.61255],
       [613.7858 ],
       [408.49228],
       [599.485  ],
       [437.55313],
       [495.7562 ],
       [574.79034],
       [663.96356],
       [483.26437],
       [308.60168],
       [479.06714],
       [510.95724],
       [372.7893 ],
       [534.9612 ],
       [400.78683],
       [637.0069 ],
       [484.95706],
       [625.056  ],
       [621.2252 ],
       [439.825  ],
       [479.01907],
       [485.35147],
       [467.3692 ],
       [679.08716],
       [396.04724],
       [697.91846],
       [581.9571 ],
       [577.95966],
       [531.43713],
       [478.1205 ],
       [511.36343],
       [352.52286],
       [535.118  ],
       [565.8882 ],
       [520.8706 ],
       [446.38275],
       [524.8574 ],
       [499.99094],
       [437.09622],
       [538.0921 ],
       [635.5315 ],
       [460.82297],
       [561.6046 ],
       [686.31494],
       [452.3472 ],
       [706.9265 ],
       [466.12827],
       [394.18338],
       [580.3063 ],
       [428.0982 ],
       [481.84744],
       [612.26465],
       [431.39746],
       [449.2736 ],
       [428.25467],
       [501.77408],
       [604.31635],
       [312.63538],
       [428.5394 ],
       [530.20984],
       [513.42303],
       [601.5443 ],
       [519.8845 ],
       [324.25577],
       [569.8495 ],
       [475.9382 ],
       [555.43634],
       [507.13504],
       [383.61752],
       [562.6764 ],
       [446.19254],
       [441.91544],
       [636.762  ],
       [517.53766],
       [544.3651 ],
       [410.17813],
       [472.43533],
       [581.5906 ],
       [664.4999 ],
       [698.0543 ],
       [657.35706],
       [555.25446],
       [496.13202],
       [392.0068 ],
       [310.18484],
       [472.59402],
       [610.77454],
       [363.81473],
       [506.09186],
       [506.25027],
       [487.7639 ],
       [473.78232],
       [417.48212],
       [487.4301 ],
       [464.57184],
       [594.48334],
       [566.7683 ],
       [437.32535],
       [626.1538 ],
       [458.94397],
       [558.3478 ],
       [398.0182 ],
       [526.5585 ],
       [567.6498 ],
       [349.1348 ],
       [544.81744],
       [598.796  ],
       [376.56082],
       [537.61237],
       [557.216  ],
       [444.97305],
       [627.8872 ],
       [409.0452 ],
       [468.37454],
       [524.154  ],
       [363.46472],
       [455.2236 ],
       [429.91336],
       [492.34344],
       [336.3314 ],
       [386.6461 ],
       [599.4895 ],
       [500.07474],
       [462.0779 ],
       [483.2221 ],
       [528.26135],
       [336.08856],
       [526.6458 ],
       [240.06769],
       [497.82047],
       [536.9478 ],
       [483.17444],
       [462.5715 ],
       [384.39508],
       [407.09714],
       [544.5131 ],
       [470.1259 ],
       [575.82837],
       [484.44458],
       [597.80316],
       [540.6956 ],
       [535.7865 ],
       [494.17587],
       [642.24805],
       [555.3878 ],
       [571.21   ],
       [435.5852 ],
       [406.85968],
       [413.557  ],
       [563.0714 ],
       [604.9785 ],
       [429.7076 ],
       [481.08438],
       [583.96985],
       [520.3096 ],
       [349.16763],
       [642.02625],
       [522.92084],
       [328.33047],
       [486.421  ],
       [401.3674 ],
       [601.7257 ],
       [336.72415],
       [515.4561 ],
       [396.34763],
       [457.87622],
       [513.25653],
       [330.33963],
       [401.128  ],
       [570.1639 ],
       [407.94937],
       [546.11255],
       [515.6739 ],
       [548.5427 ],
       [315.29764],
       [425.29602],
       [598.6658 ],
       [614.2075 ],
       [600.53723],
       [559.7761 ],
       [467.67987],
       [453.3024 ],
       [502.50876],
       [437.9716 ],
       [503.939  ],
       [497.6603 ],
       [391.6959 ],
       [601.0234 ],
       [281.65158],
       [623.5381 ],
       [584.6872 ],
       [317.25723],
       [473.90268],
       [590.2423 ],
       [369.90683],
       [453.37985],
       [314.95096],
       [511.71536],
       [400.55045],
       [550.3893 ],
       [637.38715],
       [531.3621 ],
       [496.32132],
       [629.9408 ],
       [508.61826],
       [525.9441 ],
       [513.7231 ],
       [450.16367],
       [498.6346 ],
       [453.77164],
       [585.3307 ]], dtype=float32)

19.1) TensorFlow Regression:

The Data

We will be using data from a Kaggle data set:

https://www.kaggle.com/harlfoxem/housesalesprediction

Feature Columns

  • id - Unique ID for each home sold
  • date - Date of the home sale
  • price - Price of each home sold
  • bedrooms - Number of bedrooms
  • bathrooms - Number of bathrooms, where .5 accounts for a room with a toilet but no shower
  • sqft_living - Square footage of the apartments interior living space
  • sqft_lot - Square footage of the land space
  • floors - Number of floors
  • waterfront - A dummy variable for whether the apartment was overlooking the waterfront or not
  • view - An index from 0 to 4 of how good the view of the property was
  • condition - An index from 1 to 5 on the condition of the apartment,
  • grade - An index from 1 to 13, where 1-3 falls short of building construction and design, 7 has an average level of construction and design, and 11-13 have a high quality level of construction and design.
  • sqft_above - The square footage of the interior housing space that is above ground level
  • sqft_basement - The square footage of the interior housing space that is below ground level
  • yr_built - The year the house was initially built
  • yr_renovated - The year of the house’s last renovation
  • zipcode - What zipcode area the house is in
  • lat - Lattitude
  • long - Longitude
  • sqft_living15 - The square footage of interior housing living space for the nearest 15 neighbors
  • sqft_lot15 - The square footage of the land lots of the nearest 15 neighbors
In [45]:
df = pd.read_csv('kc_house_data.csv')
df
Out[45]:
id date price bedrooms bathrooms sqft_living sqft_lot floors waterfront view ... grade sqft_above sqft_basement yr_built yr_renovated zipcode lat long sqft_living15 sqft_lot15
0 7129300520 20141013T000000 221900.0 3 1.00 1180 5650 1.0 0 0 ... 7 1180 0 1955 0 98178 47.5112 -122.257 1340 5650
1 6414100192 20141209T000000 538000.0 3 2.25 2570 7242 2.0 0 0 ... 7 2170 400 1951 1991 98125 47.7210 -122.319 1690 7639
2 5631500400 20150225T000000 180000.0 2 1.00 770 10000 1.0 0 0 ... 6 770 0 1933 0 98028 47.7379 -122.233 2720 8062
3 2487200875 20141209T000000 604000.0 4 3.00 1960 5000 1.0 0 0 ... 7 1050 910 1965 0 98136 47.5208 -122.393 1360 5000
4 1954400510 20150218T000000 510000.0 3 2.00 1680 8080 1.0 0 0 ... 8 1680 0 1987 0 98074 47.6168 -122.045 1800 7503
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
21608 263000018 20140521T000000 360000.0 3 2.50 1530 1131 3.0 0 0 ... 8 1530 0 2009 0 98103 47.6993 -122.346 1530 1509
21609 6600060120 20150223T000000 400000.0 4 2.50 2310 5813 2.0 0 0 ... 8 2310 0 2014 0 98146 47.5107 -122.362 1830 7200
21610 1523300141 20140623T000000 402101.0 2 0.75 1020 1350 2.0 0 0 ... 7 1020 0 2009 0 98144 47.5944 -122.299 1020 2007
21611 291310100 20150116T000000 400000.0 3 2.50 1600 2388 2.0 0 0 ... 8 1600 0 2004 0 98027 47.5345 -122.069 1410 1287
21612 1523300157 20141015T000000 325000.0 2 0.75 1020 1076 2.0 0 0 ... 7 1020 0 2008 0 98144 47.5941 -122.299 1020 1357

21613 rows × 21 columns

In [47]:
df.describe().T
Out[47]:
count mean std min 25% 50% 75% max
id 21613.0 4.580302e+09 2.876566e+09 1.000102e+06 2.123049e+09 3.904930e+09 7.308900e+09 9.900000e+09
price 21613.0 5.400881e+05 3.671272e+05 7.500000e+04 3.219500e+05 4.500000e+05 6.450000e+05 7.700000e+06
bedrooms 21613.0 3.370842e+00 9.300618e-01 0.000000e+00 3.000000e+00 3.000000e+00 4.000000e+00 3.300000e+01
bathrooms 21613.0 2.114757e+00 7.701632e-01 0.000000e+00 1.750000e+00 2.250000e+00 2.500000e+00 8.000000e+00
sqft_living 21613.0 2.079900e+03 9.184409e+02 2.900000e+02 1.427000e+03 1.910000e+03 2.550000e+03 1.354000e+04
sqft_lot 21613.0 1.510697e+04 4.142051e+04 5.200000e+02 5.040000e+03 7.618000e+03 1.068800e+04 1.651359e+06
floors 21613.0 1.494309e+00 5.399889e-01 1.000000e+00 1.000000e+00 1.500000e+00 2.000000e+00 3.500000e+00
waterfront 21613.0 7.541757e-03 8.651720e-02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00
view 21613.0 2.343034e-01 7.663176e-01 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 4.000000e+00
condition 21613.0 3.409430e+00 6.507430e-01 1.000000e+00 3.000000e+00 3.000000e+00 4.000000e+00 5.000000e+00
grade 21613.0 7.656873e+00 1.175459e+00 1.000000e+00 7.000000e+00 7.000000e+00 8.000000e+00 1.300000e+01
sqft_above 21613.0 1.788391e+03 8.280910e+02 2.900000e+02 1.190000e+03 1.560000e+03 2.210000e+03 9.410000e+03
sqft_basement 21613.0 2.915090e+02 4.425750e+02 0.000000e+00 0.000000e+00 0.000000e+00 5.600000e+02 4.820000e+03
yr_built 21613.0 1.971005e+03 2.937341e+01 1.900000e+03 1.951000e+03 1.975000e+03 1.997000e+03 2.015000e+03
yr_renovated 21613.0 8.440226e+01 4.016792e+02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 2.015000e+03
zipcode 21613.0 9.807794e+04 5.350503e+01 9.800100e+04 9.803300e+04 9.806500e+04 9.811800e+04 9.819900e+04
lat 21613.0 4.756005e+01 1.385637e-01 4.715590e+01 4.747100e+01 4.757180e+01 4.767800e+01 4.777760e+01
long 21613.0 -1.222139e+02 1.408283e-01 -1.225190e+02 -1.223280e+02 -1.222300e+02 -1.221250e+02 -1.213150e+02
sqft_living15 21613.0 1.986552e+03 6.853913e+02 3.990000e+02 1.490000e+03 1.840000e+03 2.360000e+03 6.210000e+03
sqft_lot15 21613.0 1.276846e+04 2.730418e+04 6.510000e+02 5.100000e+03 7.620000e+03 1.008300e+04 8.712000e+05
In [49]:
sns.distplot(df['price']);
In [50]:
sns.countplot(df['bedrooms']);
In [52]:
plt.figure(figsize=(12,8))
sns.scatterplot(x='long',y='lat',
                data=df,hue='price',
                palette='RdYlGn',edgecolor=None,alpha=0.2);
In [53]:
non_top_1_perc = df.sort_values('price',ascending=False).iloc[216:]
In [54]:
df.drop('id', axis=1, inplace=True)
In [55]:
df['date'] = pd.to_datetime(df['date'])
In [56]:
df['year'] = df['date'].apply(lambda date: date.year)
In [57]:
df['month'] = df['date'].apply(lambda date: date.month)
In [24]:
sns.boxplot(x='year',y='price',data=df)
Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x26e59114848>
In [25]:
sns.boxplot(x='month',y='price',data=df)
Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x26e593b1f48>
In [26]:
df.groupby('month').mean()['price'].plot()
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x26e593bbc48>
In [27]:
df.groupby('year').mean()['price'].plot()
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x26e5959ed48>
In [58]:
df = df.drop('date',axis=1)
In [59]:
df.columns
Out[59]:
Index(['price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
       'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long',
       'sqft_living15', 'sqft_lot15', 'year', 'month'],
      dtype='object')
In [60]:
# https://i.pinimg.com/originals/4a/ab/31/4aab31ce95d5b8474fd2cc063f334178.jpg
# May be worth considering to remove this or feature engineer categories from it
df['zipcode'].value_counts()
Out[60]:
98103    602
98038    590
98115    583
98052    574
98117    553
        ... 
98102    105
98010    100
98024     81
98148     57
98039     50
Name: zipcode, Length: 70, dtype: int64
In [61]:
df = df.drop('zipcode',axis=1)
In [62]:
df.head()
Out[62]:
price bedrooms bathrooms sqft_living sqft_lot floors waterfront view condition grade sqft_above sqft_basement yr_built yr_renovated lat long sqft_living15 sqft_lot15 year month
0 221900.0 3 1.00 1180 5650 1.0 0 0 3 7 1180 0 1955 0 47.5112 -122.257 1340 5650 2014 10
1 538000.0 3 2.25 2570 7242 2.0 0 0 3 7 2170 400 1951 1991 47.7210 -122.319 1690 7639 2014 12
2 180000.0 2 1.00 770 10000 1.0 0 0 3 6 770 0 1933 0 47.7379 -122.233 2720 8062 2015 2
3 604000.0 4 3.00 1960 5000 1.0 0 0 5 7 1050 910 1965 0 47.5208 -122.393 1360 5000 2014 12
4 510000.0 3 2.00 1680 8080 1.0 0 0 3 8 1680 0 1987 0 47.6168 -122.045 1800 7503 2015 2
In [63]:
# could make sense due to scaling, higher should correlate to more value
df['yr_renovated'].value_counts()
Out[63]:
0       20699
2014       91
2013       37
2003       36
2000       35
        ...  
1934        1
1959        1
1951        1
1948        1
1944        1
Name: yr_renovated, Length: 70, dtype: int64
In [64]:
df['sqft_basement'].value_counts()
Out[64]:
0       13126
600       221
700       218
500       214
800       206
        ...  
792         1
2590        1
935         1
2390        1
248         1
Name: sqft_basement, Length: 306, dtype: int64

Scaling and Train Test Split

In [65]:
X = df.drop('price',axis=1)
y = df['price']
In [66]:
from sklearn.model_selection import train_test_split
In [67]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=101)

Scaling

In [68]:
from sklearn.preprocessing import MinMaxScaler
In [69]:
scaler = MinMaxScaler()
In [70]:
X_train= scaler.fit_transform(X_train)
In [71]:
X_test = scaler.transform(X_test)
In [72]:
X_train.shape
Out[72]:
(15129, 19)
In [73]:
X_test.shape
Out[73]:
(6484, 19)

Creating a Model

In [74]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
In [77]:
model = Sequential()

model.add(Dense(19, activation='relu'))
model.add(Dense(19, activation='relu'))
model.add(Dense(19, activation='relu'))
model.add(Dense(19, activation='relu'))

model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')
In [80]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=128, epochs=400);
Epoch 1/400
119/119 [==============================] - 0s 2ms/step - loss: 98252677120.0000 - val_loss: 102843809792.0000
Epoch 2/400
119/119 [==============================] - 0s 2ms/step - loss: 97082482688.0000 - val_loss: 101559287808.0000
Epoch 3/400
119/119 [==============================] - 0s 2ms/step - loss: 95879061504.0000 - val_loss: 100199874560.0000
Epoch 4/400
119/119 [==============================] - 0s 2ms/step - loss: 94591205376.0000 - val_loss: 98803859456.0000
Epoch 5/400
119/119 [==============================] - 0s 2ms/step - loss: 93267623936.0000 - val_loss: 97331806208.0000
Epoch 6/400
119/119 [==============================] - 0s 2ms/step - loss: 91855912960.0000 - val_loss: 95815475200.0000
Epoch 7/400
119/119 [==============================] - 0s 2ms/step - loss: 90381139968.0000 - val_loss: 94176362496.0000
Epoch 8/400
119/119 [==============================] - 0s 2ms/step - loss: 88846516224.0000 - val_loss: 92513419264.0000
Epoch 9/400
119/119 [==============================] - 0s 2ms/step - loss: 87224991744.0000 - val_loss: 90759561216.0000
Epoch 10/400
119/119 [==============================] - 0s 2ms/step - loss: 85555134464.0000 - val_loss: 88868610048.0000
Epoch 11/400
119/119 [==============================] - 0s 2ms/step - loss: 83796172800.0000 - val_loss: 86859309056.0000
Epoch 12/400
119/119 [==============================] - 0s 2ms/step - loss: 81903378432.0000 - val_loss: 84835205120.0000
Epoch 13/400
119/119 [==============================] - 1s 4ms/step - loss: 79931088896.0000 - val_loss: 82569224192.0000
Epoch 14/400
119/119 [==============================] - 0s 3ms/step - loss: 77897129984.0000 - val_loss: 80301760512.0000
Epoch 15/400
119/119 [==============================] - 0s 2ms/step - loss: 75716698112.0000 - val_loss: 77948968960.0000
Epoch 16/400
119/119 [==============================] - 0s 2ms/step - loss: 73504620544.0000 - val_loss: 75423694848.0000
Epoch 17/400
119/119 [==============================] - 0s 2ms/step - loss: 71200202752.0000 - val_loss: 72849252352.0000
Epoch 18/400
119/119 [==============================] - 0s 3ms/step - loss: 68821639168.0000 - val_loss: 70241640448.0000
Epoch 19/400
119/119 [==============================] - 0s 4ms/step - loss: 66463084544.0000 - val_loss: 67739426816.0000
Epoch 20/400
119/119 [==============================] - 0s 3ms/step - loss: 64178192384.0000 - val_loss: 65158737920.0000
Epoch 21/400
119/119 [==============================] - 0s 3ms/step - loss: 62019309568.0000 - val_loss: 62858145792.0000
Epoch 22/400
119/119 [==============================] - 0s 3ms/step - loss: 59966783488.0000 - val_loss: 60633796608.0000
Epoch 23/400
119/119 [==============================] - 0s 2ms/step - loss: 58085224448.0000 - val_loss: 58660360192.0000
Epoch 24/400
119/119 [==============================] - 0s 3ms/step - loss: 56500744192.0000 - val_loss: 57008553984.0000
Epoch 25/400
119/119 [==============================] - 1s 5ms/step - loss: 55154282496.0000 - val_loss: 55672721408.0000
Epoch 26/400
119/119 [==============================] - 1s 5ms/step - loss: 54037389312.0000 - val_loss: 54444769280.0000
Epoch 27/400
119/119 [==============================] - 0s 2ms/step - loss: 53085036544.0000 - val_loss: 53422350336.0000
Epoch 28/400
119/119 [==============================] - 0s 4ms/step - loss: 52277956608.0000 - val_loss: 52582604800.0000
Epoch 29/400
119/119 [==============================] - 0s 2ms/step - loss: 51642204160.0000 - val_loss: 51842035712.0000
Epoch 30/400
119/119 [==============================] - 0s 2ms/step - loss: 50965745664.0000 - val_loss: 51185795072.0000
Epoch 31/400
119/119 [==============================] - 0s 4ms/step - loss: 50372698112.0000 - val_loss: 50880647168.0000
Epoch 32/400
119/119 [==============================] - 0s 3ms/step - loss: 49943388160.0000 - val_loss: 50112737280.0000
Epoch 33/400
119/119 [==============================] - 0s 2ms/step - loss: 49436049408.0000 - val_loss: 49669738496.0000
Epoch 34/400
119/119 [==============================] - 0s 3ms/step - loss: 48996249600.0000 - val_loss: 49163497472.0000
Epoch 35/400
119/119 [==============================] - 0s 2ms/step - loss: 48513523712.0000 - val_loss: 48803028992.0000
Epoch 36/400
119/119 [==============================] - 0s 3ms/step - loss: 48110305280.0000 - val_loss: 48283103232.0000
Epoch 37/400
119/119 [==============================] - 0s 2ms/step - loss: 47684542464.0000 - val_loss: 47754338304.0000
Epoch 38/400
119/119 [==============================] - 0s 3ms/step - loss: 47210799104.0000 - val_loss: 47335505920.0000
Epoch 39/400
119/119 [==============================] - 0s 2ms/step - loss: 46830878720.0000 - val_loss: 46857658368.0000
Epoch 40/400
119/119 [==============================] - 0s 2ms/step - loss: 46351208448.0000 - val_loss: 46414856192.0000
Epoch 41/400
119/119 [==============================] - 0s 2ms/step - loss: 45993914368.0000 - val_loss: 46054137856.0000
Epoch 42/400
119/119 [==============================] - 0s 3ms/step - loss: 45621391360.0000 - val_loss: 45637648384.0000
Epoch 43/400
119/119 [==============================] - 0s 2ms/step - loss: 45169680384.0000 - val_loss: 45280100352.0000
Epoch 44/400
119/119 [==============================] - 0s 2ms/step - loss: 44836245504.0000 - val_loss: 44832145408.0000
Epoch 45/400
119/119 [==============================] - 0s 3ms/step - loss: 44438130688.0000 - val_loss: 44394598400.0000
Epoch 46/400
119/119 [==============================] - 0s 3ms/step - loss: 44073086976.0000 - val_loss: 44142383104.0000
Epoch 47/400
119/119 [==============================] - 0s 3ms/step - loss: 43656880128.0000 - val_loss: 43744256000.0000
Epoch 48/400
119/119 [==============================] - 0s 2ms/step - loss: 43412160512.0000 - val_loss: 43349524480.0000
Epoch 49/400
119/119 [==============================] - 0s 3ms/step - loss: 43113881600.0000 - val_loss: 43013279744.0000
Epoch 50/400
119/119 [==============================] - 0s 2ms/step - loss: 42783477760.0000 - val_loss: 42735288320.0000
Epoch 51/400
119/119 [==============================] - 1s 5ms/step - loss: 42523672576.0000 - val_loss: 42487480320.0000
Epoch 52/400
119/119 [==============================] - 0s 4ms/step - loss: 42252283904.0000 - val_loss: 42185310208.0000
Epoch 53/400
119/119 [==============================] - 0s 3ms/step - loss: 41997578240.0000 - val_loss: 41903431680.0000
Epoch 54/400
119/119 [==============================] - 0s 3ms/step - loss: 41749585920.0000 - val_loss: 41684312064.0000
Epoch 55/400
119/119 [==============================] - 1s 5ms/step - loss: 41555361792.0000 - val_loss: 41412526080.0000
Epoch 56/400
119/119 [==============================] - 0s 3ms/step - loss: 41247064064.0000 - val_loss: 41166675968.0000
Epoch 57/400
119/119 [==============================] - 0s 3ms/step - loss: 40982761472.0000 - val_loss: 40974667776.0000
Epoch 58/400
119/119 [==============================] - 0s 4ms/step - loss: 40801853440.0000 - val_loss: 40636112896.0000
Epoch 59/400
119/119 [==============================] - 0s 3ms/step - loss: 40592658432.0000 - val_loss: 40409096192.0000
Epoch 60/400
119/119 [==============================] - 0s 4ms/step - loss: 40371724288.0000 - val_loss: 40180121600.0000
Epoch 61/400
119/119 [==============================] - 1s 5ms/step - loss: 40090611712.0000 - val_loss: 39916965888.0000
Epoch 62/400
119/119 [==============================] - 0s 3ms/step - loss: 39903576064.0000 - val_loss: 39684390912.0000
Epoch 63/400
119/119 [==============================] - 0s 4ms/step - loss: 39662858240.0000 - val_loss: 39417053184.0000
Epoch 64/400
119/119 [==============================] - 0s 4ms/step - loss: 39420555264.0000 - val_loss: 39154307072.0000
Epoch 65/400
119/119 [==============================] - 1s 4ms/step - loss: 39190876160.0000 - val_loss: 38921142272.0000
Epoch 66/400
119/119 [==============================] - 0s 3ms/step - loss: 38960427008.0000 - val_loss: 38689755136.0000
Epoch 67/400
119/119 [==============================] - 0s 3ms/step - loss: 38825562112.0000 - val_loss: 38472323072.0000
Epoch 68/400
119/119 [==============================] - 0s 3ms/step - loss: 38568972288.0000 - val_loss: 38235025408.0000
Epoch 69/400
119/119 [==============================] - 0s 3ms/step - loss: 38353350656.0000 - val_loss: 38005420032.0000
Epoch 70/400
119/119 [==============================] - 0s 2ms/step - loss: 38206631936.0000 - val_loss: 37857890304.0000
Epoch 71/400
119/119 [==============================] - 0s 3ms/step - loss: 38079078400.0000 - val_loss: 37719302144.0000
Epoch 72/400
119/119 [==============================] - 0s 2ms/step - loss: 37891506176.0000 - val_loss: 37475233792.0000
Epoch 73/400
119/119 [==============================] - 0s 3ms/step - loss: 37684535296.0000 - val_loss: 37273468928.0000
Epoch 74/400
119/119 [==============================] - 0s 2ms/step - loss: 37521608704.0000 - val_loss: 37107748864.0000
Epoch 75/400
119/119 [==============================] - 0s 3ms/step - loss: 37355937792.0000 - val_loss: 36942970880.0000
Epoch 76/400
119/119 [==============================] - 0s 2ms/step - loss: 37254148096.0000 - val_loss: 36921208832.0000
Epoch 77/400
119/119 [==============================] - 0s 2ms/step - loss: 37105360896.0000 - val_loss: 36625977344.0000
Epoch 78/400
119/119 [==============================] - 0s 2ms/step - loss: 36940369920.0000 - val_loss: 36471099392.0000
Epoch 79/400
119/119 [==============================] - 0s 3ms/step - loss: 36846190592.0000 - val_loss: 36340346880.0000
Epoch 80/400
119/119 [==============================] - 0s 2ms/step - loss: 36655472640.0000 - val_loss: 36179304448.0000
Epoch 81/400
119/119 [==============================] - 0s 2ms/step - loss: 36551217152.0000 - val_loss: 36037902336.0000
Epoch 82/400
119/119 [==============================] - 0s 2ms/step - loss: 36420472832.0000 - val_loss: 35906625536.0000
Epoch 83/400
119/119 [==============================] - 0s 2ms/step - loss: 36292636672.0000 - val_loss: 35817431040.0000
Epoch 84/400
119/119 [==============================] - 0s 2ms/step - loss: 36183576576.0000 - val_loss: 35659718656.0000
Epoch 85/400
119/119 [==============================] - 0s 2ms/step - loss: 36067155968.0000 - val_loss: 35560325120.0000
Epoch 86/400
119/119 [==============================] - 0s 2ms/step - loss: 35990851584.0000 - val_loss: 35391868928.0000
Epoch 87/400
119/119 [==============================] - 0s 2ms/step - loss: 35862634496.0000 - val_loss: 35349745664.0000
Epoch 88/400
119/119 [==============================] - 0s 2ms/step - loss: 35783331840.0000 - val_loss: 35197865984.0000
Epoch 89/400
119/119 [==============================] - 0s 2ms/step - loss: 35669282816.0000 - val_loss: 35129036800.0000
Epoch 90/400
119/119 [==============================] - 0s 3ms/step - loss: 35597930496.0000 - val_loss: 35147124736.0000
Epoch 91/400
119/119 [==============================] - 0s 2ms/step - loss: 35546611712.0000 - val_loss: 34902376448.0000
Epoch 92/400
119/119 [==============================] - 0s 2ms/step - loss: 35431489536.0000 - val_loss: 34829021184.0000
Epoch 93/400
119/119 [==============================] - 0s 2ms/step - loss: 35317166080.0000 - val_loss: 34726346752.0000
Epoch 94/400
119/119 [==============================] - 0s 2ms/step - loss: 35254431744.0000 - val_loss: 34714284032.0000
Epoch 95/400
119/119 [==============================] - 0s 2ms/step - loss: 35211157504.0000 - val_loss: 34572357632.0000
Epoch 96/400
119/119 [==============================] - 0s 2ms/step - loss: 35124637696.0000 - val_loss: 34509131776.0000
Epoch 97/400
119/119 [==============================] - 0s 2ms/step - loss: 35016593408.0000 - val_loss: 34503294976.0000
Epoch 98/400
119/119 [==============================] - 0s 2ms/step - loss: 34976940032.0000 - val_loss: 34364399616.0000
Epoch 99/400
119/119 [==============================] - 0s 2ms/step - loss: 34896531456.0000 - val_loss: 34277867520.0000
Epoch 100/400
119/119 [==============================] - 0s 2ms/step - loss: 34817560576.0000 - val_loss: 34191187968.0000
Epoch 101/400
119/119 [==============================] - 0s 2ms/step - loss: 34843009024.0000 - val_loss: 34146144256.0000
Epoch 102/400
119/119 [==============================] - 0s 2ms/step - loss: 34710188032.0000 - val_loss: 34082760704.0000
Epoch 103/400
119/119 [==============================] - 0s 2ms/step - loss: 34651176960.0000 - val_loss: 34004588544.0000
Epoch 104/400
119/119 [==============================] - 0s 2ms/step - loss: 34553344000.0000 - val_loss: 34013466624.0000
Epoch 105/400
119/119 [==============================] - 0s 2ms/step - loss: 34541518848.0000 - val_loss: 33887729664.0000
Epoch 106/400
119/119 [==============================] - 0s 2ms/step - loss: 34465738752.0000 - val_loss: 33944627200.0000
Epoch 107/400
119/119 [==============================] - 0s 2ms/step - loss: 34443026432.0000 - val_loss: 33768243200.0000
Epoch 108/400
119/119 [==============================] - 0s 2ms/step - loss: 34378743808.0000 - val_loss: 33704605696.0000
Epoch 109/400
119/119 [==============================] - 0s 2ms/step - loss: 34321057792.0000 - val_loss: 33650724864.0000
Epoch 110/400
119/119 [==============================] - 0s 2ms/step - loss: 34279524352.0000 - val_loss: 33615306752.0000
Epoch 111/400
119/119 [==============================] - 0s 2ms/step - loss: 34238396416.0000 - val_loss: 33540061184.0000
Epoch 112/400
119/119 [==============================] - 0s 2ms/step - loss: 34197389312.0000 - val_loss: 33573140480.0000
Epoch 113/400
119/119 [==============================] - 0s 2ms/step - loss: 34126950400.0000 - val_loss: 33431166976.0000
Epoch 114/400
119/119 [==============================] - 0s 2ms/step - loss: 34073237504.0000 - val_loss: 33382787072.0000
Epoch 115/400
119/119 [==============================] - 0s 2ms/step - loss: 33999990784.0000 - val_loss: 33350285312.0000
Epoch 116/400
119/119 [==============================] - 0s 2ms/step - loss: 34003324928.0000 - val_loss: 33288867840.0000
Epoch 117/400
119/119 [==============================] - 0s 2ms/step - loss: 33928581120.0000 - val_loss: 33243269120.0000
Epoch 118/400
119/119 [==============================] - 0s 2ms/step - loss: 33912621056.0000 - val_loss: 33262385152.0000
Epoch 119/400
119/119 [==============================] - 0s 2ms/step - loss: 33874610176.0000 - val_loss: 33153574912.0000
Epoch 120/400
119/119 [==============================] - 0s 2ms/step - loss: 33826246656.0000 - val_loss: 33187438592.0000
Epoch 121/400
119/119 [==============================] - 0s 2ms/step - loss: 33771560960.0000 - val_loss: 33062502400.0000
Epoch 122/400
119/119 [==============================] - 0s 2ms/step - loss: 33744576512.0000 - val_loss: 33012494336.0000
Epoch 123/400
119/119 [==============================] - 0s 2ms/step - loss: 33677981696.0000 - val_loss: 32989126656.0000
Epoch 124/400
119/119 [==============================] - 0s 2ms/step - loss: 33634838528.0000 - val_loss: 32976128000.0000
Epoch 125/400
119/119 [==============================] - 0s 2ms/step - loss: 33631733760.0000 - val_loss: 32884756480.0000
Epoch 126/400
119/119 [==============================] - 0s 2ms/step - loss: 33576595456.0000 - val_loss: 32848285696.0000
Epoch 127/400
119/119 [==============================] - 0s 2ms/step - loss: 33534636032.0000 - val_loss: 32797564928.0000
Epoch 128/400
119/119 [==============================] - 0s 3ms/step - loss: 33492600832.0000 - val_loss: 32761845760.0000
Epoch 129/400
119/119 [==============================] - 0s 2ms/step - loss: 33454835712.0000 - val_loss: 32719552512.0000
Epoch 130/400
119/119 [==============================] - 0s 2ms/step - loss: 33410359296.0000 - val_loss: 32756185088.0000
Epoch 131/400
119/119 [==============================] - 0s 2ms/step - loss: 33393401856.0000 - val_loss: 32687765504.0000
Epoch 132/400
119/119 [==============================] - 0s 2ms/step - loss: 33325883392.0000 - val_loss: 32608745472.0000
Epoch 133/400
119/119 [==============================] - 0s 2ms/step - loss: 33303328768.0000 - val_loss: 32559261696.0000
Epoch 134/400
119/119 [==============================] - 0s 3ms/step - loss: 33243301888.0000 - val_loss: 32554422272.0000
Epoch 135/400
119/119 [==============================] - 0s 3ms/step - loss: 33290276864.0000 - val_loss: 32480698368.0000
Epoch 136/400
119/119 [==============================] - 0s 3ms/step - loss: 33202362368.0000 - val_loss: 32474253312.0000
Epoch 137/400
119/119 [==============================] - 0s 2ms/step - loss: 33163886592.0000 - val_loss: 32424343552.0000
Epoch 138/400
119/119 [==============================] - 0s 2ms/step - loss: 33120309248.0000 - val_loss: 32400771072.0000
Epoch 139/400
119/119 [==============================] - 0s 2ms/step - loss: 33099958272.0000 - val_loss: 32350461952.0000
Epoch 140/400
119/119 [==============================] - 0s 3ms/step - loss: 33043339264.0000 - val_loss: 32321658880.0000
Epoch 141/400
119/119 [==============================] - 0s 2ms/step - loss: 33009883136.0000 - val_loss: 32264622080.0000
Epoch 142/400
119/119 [==============================] - 0s 4ms/step - loss: 32986013696.0000 - val_loss: 32237584384.0000
Epoch 143/400
119/119 [==============================] - 0s 2ms/step - loss: 32953720832.0000 - val_loss: 32251627520.0000
Epoch 144/400
119/119 [==============================] - 0s 2ms/step - loss: 32896708608.0000 - val_loss: 32274018304.0000
Epoch 145/400
119/119 [==============================] - 0s 3ms/step - loss: 32883982336.0000 - val_loss: 32216741888.0000
Epoch 146/400
119/119 [==============================] - 0s 2ms/step - loss: 32859412480.0000 - val_loss: 32128712704.0000
Epoch 147/400
119/119 [==============================] - 0s 2ms/step - loss: 32820596736.0000 - val_loss: 32129744896.0000
Epoch 148/400
119/119 [==============================] - 0s 2ms/step - loss: 32798337024.0000 - val_loss: 32071829504.0000
Epoch 149/400
119/119 [==============================] - 0s 2ms/step - loss: 32763293696.0000 - val_loss: 32022712320.0000
Epoch 150/400
119/119 [==============================] - 0s 2ms/step - loss: 32728086528.0000 - val_loss: 32026810368.0000
Epoch 151/400
119/119 [==============================] - 0s 2ms/step - loss: 32717504512.0000 - val_loss: 31983104000.0000
Epoch 152/400
119/119 [==============================] - 0s 2ms/step - loss: 32660305920.0000 - val_loss: 31925057536.0000
Epoch 153/400
119/119 [==============================] - 0s 2ms/step - loss: 32629401600.0000 - val_loss: 31898859520.0000
Epoch 154/400
119/119 [==============================] - 0s 2ms/step - loss: 32596281344.0000 - val_loss: 31886501888.0000
Epoch 155/400
119/119 [==============================] - 0s 2ms/step - loss: 32565463040.0000 - val_loss: 31890579456.0000
Epoch 156/400
119/119 [==============================] - 0s 2ms/step - loss: 32580245504.0000 - val_loss: 31818248192.0000
Epoch 157/400
119/119 [==============================] - 0s 2ms/step - loss: 32497082368.0000 - val_loss: 31933827072.0000
Epoch 158/400
119/119 [==============================] - 0s 2ms/step - loss: 32534308864.0000 - val_loss: 31748782080.0000
Epoch 159/400
119/119 [==============================] - 0s 2ms/step - loss: 32431730688.0000 - val_loss: 31723143168.0000
Epoch 160/400
119/119 [==============================] - 0s 2ms/step - loss: 32473092096.0000 - val_loss: 31735203840.0000
Epoch 161/400
119/119 [==============================] - 0s 2ms/step - loss: 32384180224.0000 - val_loss: 31687649280.0000
Epoch 162/400
119/119 [==============================] - 0s 2ms/step - loss: 32363374592.0000 - val_loss: 31653134336.0000
Epoch 163/400
119/119 [==============================] - 0s 2ms/step - loss: 32321683456.0000 - val_loss: 31613431808.0000
Epoch 164/400
119/119 [==============================] - 0s 1ms/step - loss: 32304617472.0000 - val_loss: 31624499200.0000
Epoch 165/400
119/119 [==============================] - 0s 2ms/step - loss: 32309219328.0000 - val_loss: 31545509888.0000
Epoch 166/400
119/119 [==============================] - 0s 2ms/step - loss: 32237099008.0000 - val_loss: 31525203968.0000
Epoch 167/400
119/119 [==============================] - 0s 2ms/step - loss: 32227008512.0000 - val_loss: 31510292480.0000
Epoch 168/400
119/119 [==============================] - 0s 2ms/step - loss: 32194846720.0000 - val_loss: 31492947968.0000
Epoch 169/400
119/119 [==============================] - 0s 2ms/step - loss: 32144171008.0000 - val_loss: 31476590592.0000
Epoch 170/400
119/119 [==============================] - 0s 2ms/step - loss: 32137240576.0000 - val_loss: 31451402240.0000
Epoch 171/400
119/119 [==============================] - 0s 2ms/step - loss: 32100003840.0000 - val_loss: 31505903616.0000
Epoch 172/400
119/119 [==============================] - 0s 2ms/step - loss: 32042096640.0000 - val_loss: 31415840768.0000
Epoch 173/400
119/119 [==============================] - 0s 2ms/step - loss: 32058085376.0000 - val_loss: 31416909824.0000
Epoch 174/400
119/119 [==============================] - 0s 2ms/step - loss: 32109563904.0000 - val_loss: 31338139648.0000
Epoch 175/400
119/119 [==============================] - 0s 2ms/step - loss: 32029384704.0000 - val_loss: 31330113536.0000
Epoch 176/400
119/119 [==============================] - 0s 2ms/step - loss: 31987193856.0000 - val_loss: 31297937408.0000
Epoch 177/400
119/119 [==============================] - 0s 2ms/step - loss: 31982036992.0000 - val_loss: 31275925504.0000
Epoch 178/400
119/119 [==============================] - 0s 2ms/step - loss: 31947212800.0000 - val_loss: 31282339840.0000
Epoch 179/400
119/119 [==============================] - 0s 2ms/step - loss: 31934728192.0000 - val_loss: 31248271360.0000
Epoch 180/400
119/119 [==============================] - 0s 2ms/step - loss: 31923216384.0000 - val_loss: 31183337472.0000
Epoch 181/400
119/119 [==============================] - 0s 2ms/step - loss: 31860512768.0000 - val_loss: 31172564992.0000
Epoch 182/400
119/119 [==============================] - 0s 2ms/step - loss: 31840294912.0000 - val_loss: 31140763648.0000
Epoch 183/400
119/119 [==============================] - 0s 2ms/step - loss: 31827412992.0000 - val_loss: 31175725056.0000
Epoch 184/400
119/119 [==============================] - 0s 3ms/step - loss: 31777845248.0000 - val_loss: 31104690176.0000
Epoch 185/400
119/119 [==============================] - 0s 3ms/step - loss: 31764862976.0000 - val_loss: 31130746880.0000
Epoch 186/400
119/119 [==============================] - 0s 2ms/step - loss: 31788951552.0000 - val_loss: 31117387776.0000
Epoch 187/400
119/119 [==============================] - 0s 2ms/step - loss: 31722225664.0000 - val_loss: 31100901376.0000
Epoch 188/400
119/119 [==============================] - 0s 2ms/step - loss: 31702663168.0000 - val_loss: 31021967360.0000
Epoch 189/400
119/119 [==============================] - 0s 2ms/step - loss: 31656527872.0000 - val_loss: 30987390976.0000
Epoch 190/400
119/119 [==============================] - 0s 2ms/step - loss: 31661516800.0000 - val_loss: 31071299584.0000
Epoch 191/400
119/119 [==============================] - 0s 2ms/step - loss: 31638276096.0000 - val_loss: 30963714048.0000
Epoch 192/400
119/119 [==============================] - 0s 2ms/step - loss: 31628118016.0000 - val_loss: 30935365632.0000
Epoch 193/400
119/119 [==============================] - 0s 2ms/step - loss: 31590674432.0000 - val_loss: 31035744256.0000
Epoch 194/400
119/119 [==============================] - 0s 2ms/step - loss: 31602337792.0000 - val_loss: 30895675392.0000
Epoch 195/400
119/119 [==============================] - 0s 2ms/step - loss: 31544002560.0000 - val_loss: 30876235776.0000
Epoch 196/400
119/119 [==============================] - 0s 3ms/step - loss: 31555121152.0000 - val_loss: 30865324032.0000
Epoch 197/400
119/119 [==============================] - 0s 2ms/step - loss: 31516516352.0000 - val_loss: 30856943616.0000
Epoch 198/400
119/119 [==============================] - 1s 4ms/step - loss: 31473924096.0000 - val_loss: 30840924160.0000
Epoch 199/400
119/119 [==============================] - 0s 3ms/step - loss: 31473588224.0000 - val_loss: 30798948352.0000
Epoch 200/400
119/119 [==============================] - 0s 4ms/step - loss: 31460825088.0000 - val_loss: 30783692800.0000
Epoch 201/400
119/119 [==============================] - 0s 3ms/step - loss: 31451535360.0000 - val_loss: 30795284480.0000
Epoch 202/400
119/119 [==============================] - 0s 3ms/step - loss: 31441543168.0000 - val_loss: 30744735744.0000
Epoch 203/400
119/119 [==============================] - 0s 3ms/step - loss: 31364411392.0000 - val_loss: 30770571264.0000
Epoch 204/400
119/119 [==============================] - 0s 3ms/step - loss: 31356602368.0000 - val_loss: 30707499008.0000
Epoch 205/400
119/119 [==============================] - 0s 3ms/step - loss: 31364519936.0000 - val_loss: 30751225856.0000
Epoch 206/400
119/119 [==============================] - 0s 3ms/step - loss: 31358291968.0000 - val_loss: 30687416320.0000
Epoch 207/400
119/119 [==============================] - 0s 3ms/step - loss: 31304146944.0000 - val_loss: 30647478272.0000
Epoch 208/400
119/119 [==============================] - 0s 3ms/step - loss: 31281074176.0000 - val_loss: 30645841920.0000
Epoch 209/400
119/119 [==============================] - 0s 3ms/step - loss: 31284328448.0000 - val_loss: 30669416448.0000
Epoch 210/400
119/119 [==============================] - 0s 3ms/step - loss: 31274665984.0000 - val_loss: 30614890496.0000
Epoch 211/400
119/119 [==============================] - 0s 3ms/step - loss: 31238606848.0000 - val_loss: 30589186048.0000
Epoch 212/400
119/119 [==============================] - 0s 3ms/step - loss: 31248281600.0000 - val_loss: 30590009344.0000
Epoch 213/400
119/119 [==============================] - 0s 3ms/step - loss: 31219306496.0000 - val_loss: 30580811776.0000
Epoch 214/400
119/119 [==============================] - 0s 3ms/step - loss: 31167305728.0000 - val_loss: 30627573760.0000
Epoch 215/400
119/119 [==============================] - 0s 3ms/step - loss: 31172313088.0000 - val_loss: 30535315456.0000
Epoch 216/400
119/119 [==============================] - 0s 3ms/step - loss: 31133784064.0000 - val_loss: 30524624896.0000
Epoch 217/400
119/119 [==============================] - 0s 3ms/step - loss: 31125002240.0000 - val_loss: 30575290368.0000
Epoch 218/400
119/119 [==============================] - 0s 4ms/step - loss: 31163219968.0000 - val_loss: 30468065280.0000
Epoch 219/400
119/119 [==============================] - 0s 3ms/step - loss: 31107979264.0000 - val_loss: 30536583168.0000
Epoch 220/400
119/119 [==============================] - 0s 3ms/step - loss: 31087863808.0000 - val_loss: 30440943616.0000
Epoch 221/400
119/119 [==============================] - 0s 3ms/step - loss: 31137357824.0000 - val_loss: 30425612288.0000
Epoch 222/400
119/119 [==============================] - 0s 3ms/step - loss: 31038652416.0000 - val_loss: 30412822528.0000
Epoch 223/400
119/119 [==============================] - 0s 3ms/step - loss: 31030958080.0000 - val_loss: 30427172864.0000
Epoch 224/400
119/119 [==============================] - 0s 3ms/step - loss: 31003783168.0000 - val_loss: 30489767936.0000
Epoch 225/400
119/119 [==============================] - 0s 3ms/step - loss: 31004528640.0000 - val_loss: 30391279616.0000
Epoch 226/400
119/119 [==============================] - 0s 3ms/step - loss: 30958499840.0000 - val_loss: 30500489216.0000
Epoch 227/400
119/119 [==============================] - 0s 2ms/step - loss: 30973976576.0000 - val_loss: 30349404160.0000
Epoch 228/400
119/119 [==============================] - 0s 3ms/step - loss: 30992379904.0000 - val_loss: 30383708160.0000
Epoch 229/400
119/119 [==============================] - 0s 3ms/step - loss: 30937589760.0000 - val_loss: 30315198464.0000
Epoch 230/400
119/119 [==============================] - 0s 3ms/step - loss: 30923165696.0000 - val_loss: 30374492160.0000
Epoch 231/400
119/119 [==============================] - 0s 4ms/step - loss: 30906515456.0000 - val_loss: 30280523776.0000
Epoch 232/400
119/119 [==============================] - 0s 3ms/step - loss: 30885152768.0000 - val_loss: 30267158528.0000
Epoch 233/400
119/119 [==============================] - 1s 7ms/step - loss: 30884552704.0000 - val_loss: 30284253184.0000
Epoch 234/400
119/119 [==============================] - 0s 2ms/step - loss: 30900498432.0000 - val_loss: 30247860224.0000
Epoch 235/400
119/119 [==============================] - 0s 2ms/step - loss: 30850846720.0000 - val_loss: 30266695680.0000
Epoch 236/400
119/119 [==============================] - 0s 2ms/step - loss: 30836566016.0000 - val_loss: 30305064960.0000
Epoch 237/400
119/119 [==============================] - 0s 2ms/step - loss: 30820237312.0000 - val_loss: 30250868736.0000
Epoch 238/400
119/119 [==============================] - 0s 2ms/step - loss: 30809026560.0000 - val_loss: 30198843392.0000
Epoch 239/400
119/119 [==============================] - 0s 2ms/step - loss: 30784073728.0000 - val_loss: 30172841984.0000
Epoch 240/400
119/119 [==============================] - 0s 2ms/step - loss: 30841313280.0000 - val_loss: 30235625472.0000
Epoch 241/400
119/119 [==============================] - 0s 2ms/step - loss: 30743052288.0000 - val_loss: 30162497536.0000
Epoch 242/400
119/119 [==============================] - 0s 2ms/step - loss: 30717925376.0000 - val_loss: 30180208640.0000
Epoch 243/400
119/119 [==============================] - 0s 3ms/step - loss: 30755358720.0000 - val_loss: 30144915456.0000
Epoch 244/400
119/119 [==============================] - 1s 4ms/step - loss: 30715504640.0000 - val_loss: 30135973888.0000
Epoch 245/400
119/119 [==============================] - 0s 3ms/step - loss: 30737545216.0000 - val_loss: 30114854912.0000
Epoch 246/400
119/119 [==============================] - 0s 3ms/step - loss: 30693695488.0000 - val_loss: 30100457472.0000
Epoch 247/400
119/119 [==============================] - 0s 3ms/step - loss: 30693935104.0000 - val_loss: 30086078464.0000
Epoch 248/400
119/119 [==============================] - 0s 3ms/step - loss: 30712807424.0000 - val_loss: 30091954176.0000
Epoch 249/400
119/119 [==============================] - 0s 3ms/step - loss: 30638018560.0000 - val_loss: 30091864064.0000
Epoch 250/400
119/119 [==============================] - 0s 3ms/step - loss: 30624839680.0000 - val_loss: 30068338688.0000
Epoch 251/400
119/119 [==============================] - 0s 3ms/step - loss: 30610872320.0000 - val_loss: 30037604352.0000
Epoch 252/400
119/119 [==============================] - 0s 3ms/step - loss: 30628444160.0000 - val_loss: 30041765888.0000
Epoch 253/400
119/119 [==============================] - 0s 4ms/step - loss: 30626740224.0000 - val_loss: 30021224448.0000
Epoch 254/400
119/119 [==============================] - 1s 5ms/step - loss: 30619512832.0000 - val_loss: 30037592064.0000
Epoch 255/400
119/119 [==============================] - 1s 4ms/step - loss: 30556626944.0000 - val_loss: 29991151616.0000
Epoch 256/400
119/119 [==============================] - 0s 4ms/step - loss: 30608447488.0000 - val_loss: 29992007680.0000
Epoch 257/400
119/119 [==============================] - 0s 2ms/step - loss: 30570616832.0000 - val_loss: 29986486272.0000
Epoch 258/400
119/119 [==============================] - 1s 5ms/step - loss: 30537009152.0000 - val_loss: 29950138368.0000
Epoch 259/400
119/119 [==============================] - 0s 3ms/step - loss: 30525730816.0000 - val_loss: 29960347648.0000
Epoch 260/400
119/119 [==============================] - 0s 3ms/step - loss: 30509291520.0000 - val_loss: 29944317952.0000
Epoch 261/400
119/119 [==============================] - 0s 3ms/step - loss: 30469636096.0000 - val_loss: 30012168192.0000
Epoch 262/400
119/119 [==============================] - 0s 3ms/step - loss: 30479456256.0000 - val_loss: 29899534336.0000
Epoch 263/400
119/119 [==============================] - 0s 3ms/step - loss: 30449315840.0000 - val_loss: 29890535424.0000
Epoch 264/400
119/119 [==============================] - 0s 3ms/step - loss: 30443579392.0000 - val_loss: 29915701248.0000
Epoch 265/400
119/119 [==============================] - 0s 3ms/step - loss: 30432768000.0000 - val_loss: 29887713280.0000
Epoch 266/400
119/119 [==============================] - 0s 3ms/step - loss: 30413791232.0000 - val_loss: 29925728256.0000
Epoch 267/400
119/119 [==============================] - 0s 4ms/step - loss: 30432522240.0000 - val_loss: 29842352128.0000
Epoch 268/400
119/119 [==============================] - 0s 3ms/step - loss: 30426939392.0000 - val_loss: 29842505728.0000
Epoch 269/400
119/119 [==============================] - 1s 4ms/step - loss: 30376337408.0000 - val_loss: 29852465152.0000
Epoch 270/400
119/119 [==============================] - 0s 3ms/step - loss: 30356297728.0000 - val_loss: 29808670720.0000
Epoch 271/400
119/119 [==============================] - 0s 3ms/step - loss: 30356895744.0000 - val_loss: 29807869952.0000
Epoch 272/400
119/119 [==============================] - 1s 4ms/step - loss: 30331305984.0000 - val_loss: 29788784640.0000
Epoch 273/400
119/119 [==============================] - ETA: 0s - loss: 30417270784.000 - 0s 4ms/step - loss: 30332272640.0000 - val_loss: 29776181248.0000
Epoch 274/400
119/119 [==============================] - 0s 3ms/step - loss: 30285924352.0000 - val_loss: 29770289152.0000
Epoch 275/400
119/119 [==============================] - 1s 4ms/step - loss: 30280701952.0000 - val_loss: 29789829120.0000
Epoch 276/400
119/119 [==============================] - 0s 3ms/step - loss: 30283372544.0000 - val_loss: 29748668416.0000
Epoch 277/400
119/119 [==============================] - 0s 3ms/step - loss: 30249682944.0000 - val_loss: 29724315648.0000
Epoch 278/400
119/119 [==============================] - 0s 3ms/step - loss: 30234900480.0000 - val_loss: 29826189312.0000
Epoch 279/400
119/119 [==============================] - 0s 3ms/step - loss: 30296311808.0000 - val_loss: 29785815040.0000
Epoch 280/400
119/119 [==============================] - 0s 3ms/step - loss: 30235301888.0000 - val_loss: 29688520704.0000
Epoch 281/400
119/119 [==============================] - 0s 4ms/step - loss: 30214301696.0000 - val_loss: 29667444736.0000
Epoch 282/400
119/119 [==============================] - 0s 4ms/step - loss: 30212599808.0000 - val_loss: 29670266880.0000
Epoch 283/400
119/119 [==============================] - 0s 3ms/step - loss: 30190520320.0000 - val_loss: 29656295424.0000
Epoch 284/400
119/119 [==============================] - 0s 4ms/step - loss: 30164799488.0000 - val_loss: 29635131392.0000
Epoch 285/400
119/119 [==============================] - 1s 5ms/step - loss: 30160461824.0000 - val_loss: 29660506112.0000
Epoch 286/400
119/119 [==============================] - 0s 2ms/step - loss: 30166710272.0000 - val_loss: 29614831616.0000
Epoch 287/400
119/119 [==============================] - 1s 5ms/step - loss: 30122403840.0000 - val_loss: 29729454080.0000
Epoch 288/400
119/119 [==============================] - 0s 3ms/step - loss: 30105661440.0000 - val_loss: 29626245120.0000
Epoch 289/400
119/119 [==============================] - 0s 3ms/step - loss: 30088974336.0000 - val_loss: 29587462144.0000
Epoch 290/400
119/119 [==============================] - 0s 3ms/step - loss: 30055092224.0000 - val_loss: 29548851200.0000
Epoch 291/400
119/119 [==============================] - 0s 3ms/step - loss: 30060736512.0000 - val_loss: 29543913472.0000
Epoch 292/400
119/119 [==============================] - 0s 3ms/step - loss: 30094405632.0000 - val_loss: 29531133952.0000
Epoch 293/400
119/119 [==============================] - 1s 4ms/step - loss: 30065412096.0000 - val_loss: 29617874944.0000
Epoch 294/400
119/119 [==============================] - 0s 2ms/step - loss: 30041219072.0000 - val_loss: 29526142976.0000
Epoch 295/400
119/119 [==============================] - 0s 4ms/step - loss: 30002601984.0000 - val_loss: 29562474496.0000
Epoch 296/400
119/119 [==============================] - 0s 4ms/step - loss: 30016264192.0000 - val_loss: 29508526080.0000
Epoch 297/400
119/119 [==============================] - 0s 3ms/step - loss: 30020546560.0000 - val_loss: 29476452352.0000
Epoch 298/400
119/119 [==============================] - 0s 3ms/step - loss: 29991282688.0000 - val_loss: 29475229696.0000
Epoch 299/400
119/119 [==============================] - 0s 4ms/step - loss: 29968281600.0000 - val_loss: 29472118784.0000
Epoch 300/400
119/119 [==============================] - 0s 3ms/step - loss: 29974036480.0000 - val_loss: 29469710336.0000
Epoch 301/400
119/119 [==============================] - 0s 3ms/step - loss: 29910368256.0000 - val_loss: 29451251712.0000
Epoch 302/400
119/119 [==============================] - 0s 3ms/step - loss: 29955813376.0000 - val_loss: 29459906560.0000
Epoch 303/400
119/119 [==============================] - 0s 3ms/step - loss: 29902188544.0000 - val_loss: 29447950336.0000
Epoch 304/400
119/119 [==============================] - 0s 4ms/step - loss: 29906548736.0000 - val_loss: 29431736320.0000
Epoch 305/400
119/119 [==============================] - 0s 3ms/step - loss: 29871499264.0000 - val_loss: 29407787008.0000
Epoch 306/400
119/119 [==============================] - 0s 3ms/step - loss: 29875085312.0000 - val_loss: 29465485312.0000
Epoch 307/400
119/119 [==============================] - 1s 5ms/step - loss: 29855019008.0000 - val_loss: 29413939200.0000
Epoch 308/400
119/119 [==============================] - 0s 3ms/step - loss: 29835175936.0000 - val_loss: 29368084480.0000
Epoch 309/400
119/119 [==============================] - 1s 6ms/step - loss: 29826215936.0000 - val_loss: 29386721280.0000
Epoch 310/400
119/119 [==============================] - 1s 5ms/step - loss: 29810931712.0000 - val_loss: 29413859328.0000
Epoch 311/400
119/119 [==============================] - 0s 3ms/step - loss: 29806565376.0000 - val_loss: 29403858944.0000
Epoch 312/400
119/119 [==============================] - 0s 2ms/step - loss: 29811363840.0000 - val_loss: 29325484032.0000
Epoch 313/400
119/119 [==============================] - 0s 2ms/step - loss: 29778421760.0000 - val_loss: 29313019904.0000
Epoch 314/400
119/119 [==============================] - 0s 2ms/step - loss: 29786396672.0000 - val_loss: 29303371776.0000
Epoch 315/400
119/119 [==============================] - 0s 2ms/step - loss: 29743513600.0000 - val_loss: 29290694656.0000
Epoch 316/400
119/119 [==============================] - 0s 2ms/step - loss: 29732225024.0000 - val_loss: 29287610368.0000
Epoch 317/400
119/119 [==============================] - 0s 2ms/step - loss: 29769721856.0000 - val_loss: 29314052096.0000
Epoch 318/400
119/119 [==============================] - 0s 2ms/step - loss: 29720068096.0000 - val_loss: 29261705216.0000
Epoch 319/400
119/119 [==============================] - 0s 2ms/step - loss: 29730410496.0000 - val_loss: 29249241088.0000
Epoch 320/400
119/119 [==============================] - 0s 2ms/step - loss: 29668667392.0000 - val_loss: 29233524736.0000
Epoch 321/400
119/119 [==============================] - 0s 2ms/step - loss: 29680173056.0000 - val_loss: 29234944000.0000
Epoch 322/400
119/119 [==============================] - 0s 3ms/step - loss: 29668790272.0000 - val_loss: 29226885120.0000
Epoch 323/400
119/119 [==============================] - 0s 2ms/step - loss: 29647038464.0000 - val_loss: 29224476672.0000
Epoch 324/400
119/119 [==============================] - 0s 2ms/step - loss: 29629278208.0000 - val_loss: 29206824960.0000
Epoch 325/400
119/119 [==============================] - 0s 3ms/step - loss: 29610950656.0000 - val_loss: 29186422784.0000
Epoch 326/400
119/119 [==============================] - 0s 3ms/step - loss: 29604001792.0000 - val_loss: 29194971136.0000
Epoch 327/400
119/119 [==============================] - 0s 2ms/step - loss: 29571964928.0000 - val_loss: 29167585280.0000
Epoch 328/400
119/119 [==============================] - 0s 3ms/step - loss: 29617528832.0000 - val_loss: 29157752832.0000
Epoch 329/400
119/119 [==============================] - 0s 3ms/step - loss: 29557833728.0000 - val_loss: 29152319488.0000
Epoch 330/400
119/119 [==============================] - 0s 2ms/step - loss: 29525528576.0000 - val_loss: 29162741760.0000
Epoch 331/400
119/119 [==============================] - 0s 3ms/step - loss: 29551966208.0000 - val_loss: 29314480128.0000
Epoch 332/400
119/119 [==============================] - 0s 2ms/step - loss: 29543952384.0000 - val_loss: 29212710912.0000
Epoch 333/400
119/119 [==============================] - 0s 3ms/step - loss: 29533034496.0000 - val_loss: 29134919680.0000
Epoch 334/400
119/119 [==============================] - 0s 3ms/step - loss: 29486706688.0000 - val_loss: 29112725504.0000
Epoch 335/400
119/119 [==============================] - 0s 3ms/step - loss: 29486309376.0000 - val_loss: 29103638528.0000
Epoch 336/400
119/119 [==============================] - 0s 3ms/step - loss: 29485340672.0000 - val_loss: 29085382656.0000
Epoch 337/400
119/119 [==============================] - 0s 3ms/step - loss: 29462235136.0000 - val_loss: 29138264064.0000
Epoch 338/400
119/119 [==============================] - 0s 3ms/step - loss: 29499086848.0000 - val_loss: 29210912768.0000
Epoch 339/400
119/119 [==============================] - 0s 3ms/step - loss: 29423529984.0000 - val_loss: 29055780864.0000
Epoch 340/400
119/119 [==============================] - 0s 3ms/step - loss: 29453139968.0000 - val_loss: 29059899392.0000
Epoch 341/400
119/119 [==============================] - 0s 3ms/step - loss: 29419008000.0000 - val_loss: 29115912192.0000
Epoch 342/400
119/119 [==============================] - 0s 2ms/step - loss: 29382805504.0000 - val_loss: 29103398912.0000
Epoch 343/400
119/119 [==============================] - 0s 2ms/step - loss: 29378510848.0000 - val_loss: 29205065728.0000
Epoch 344/400
119/119 [==============================] - 0s 2ms/step - loss: 29354448896.0000 - val_loss: 29008506880.0000
Epoch 345/400
119/119 [==============================] - 0s 2ms/step - loss: 29383704576.0000 - val_loss: 29007728640.0000
Epoch 346/400
119/119 [==============================] - 0s 2ms/step - loss: 29315977216.0000 - val_loss: 29141368832.0000
Epoch 347/400
119/119 [==============================] - 0s 2ms/step - loss: 29360957440.0000 - val_loss: 29004681216.0000
Epoch 348/400
119/119 [==============================] - 0s 3ms/step - loss: 29313665024.0000 - val_loss: 29012856832.0000
Epoch 349/400
119/119 [==============================] - 0s 2ms/step - loss: 29294669824.0000 - val_loss: 28955738112.0000
Epoch 350/400
119/119 [==============================] - 0s 2ms/step - loss: 29298395136.0000 - val_loss: 28940423168.0000
Epoch 351/400
119/119 [==============================] - 0s 2ms/step - loss: 29261918208.0000 - val_loss: 28921442304.0000
Epoch 352/400
119/119 [==============================] - 0s 2ms/step - loss: 29230716928.0000 - val_loss: 28925702144.0000
Epoch 353/400
119/119 [==============================] - 0s 2ms/step - loss: 29229205504.0000 - val_loss: 28922281984.0000
Epoch 354/400
119/119 [==============================] - 0s 2ms/step - loss: 29213802496.0000 - val_loss: 28903784448.0000
Epoch 355/400
119/119 [==============================] - 0s 2ms/step - loss: 29192828928.0000 - val_loss: 28887900160.0000
Epoch 356/400
119/119 [==============================] - 0s 2ms/step - loss: 29145554944.0000 - val_loss: 28933517312.0000
Epoch 357/400
119/119 [==============================] - 0s 2ms/step - loss: 29171238912.0000 - val_loss: 28905588736.0000
Epoch 358/400
119/119 [==============================] - 0s 2ms/step - loss: 29162258432.0000 - val_loss: 28853307392.0000
Epoch 359/400
119/119 [==============================] - 0s 2ms/step - loss: 29141501952.0000 - val_loss: 28852932608.0000
Epoch 360/400
119/119 [==============================] - 0s 2ms/step - loss: 29126854656.0000 - val_loss: 28812548096.0000
Epoch 361/400
119/119 [==============================] - 0s 2ms/step - loss: 29135802368.0000 - val_loss: 28850937856.0000
Epoch 362/400
119/119 [==============================] - 0s 2ms/step - loss: 29109504000.0000 - val_loss: 28783118336.0000
Epoch 363/400
119/119 [==============================] - 0s 2ms/step - loss: 29079392256.0000 - val_loss: 28786855936.0000
Epoch 364/400
119/119 [==============================] - 0s 2ms/step - loss: 29068566528.0000 - val_loss: 28755863552.0000
Epoch 365/400
119/119 [==============================] - 0s 2ms/step - loss: 29046767616.0000 - val_loss: 28755136512.0000
Epoch 366/400
119/119 [==============================] - 0s 2ms/step - loss: 29091940352.0000 - val_loss: 28744683520.0000
Epoch 367/400
119/119 [==============================] - 0s 2ms/step - loss: 29064003584.0000 - val_loss: 28744445952.0000
Epoch 368/400
119/119 [==============================] - 0s 2ms/step - loss: 28975960064.0000 - val_loss: 28726102016.0000
Epoch 369/400
119/119 [==============================] - 0s 2ms/step - loss: 29005242368.0000 - val_loss: 28709517312.0000
Epoch 370/400
119/119 [==============================] - 0s 2ms/step - loss: 29005494272.0000 - val_loss: 28676956160.0000
Epoch 371/400
119/119 [==============================] - 0s 2ms/step - loss: 28951513088.0000 - val_loss: 28696850432.0000
Epoch 372/400
119/119 [==============================] - 0s 2ms/step - loss: 28946186240.0000 - val_loss: 28657901568.0000
Epoch 373/400
119/119 [==============================] - 0s 2ms/step - loss: 28904017920.0000 - val_loss: 28638797824.0000
Epoch 374/400
119/119 [==============================] - 0s 2ms/step - loss: 28913391616.0000 - val_loss: 28645935104.0000
Epoch 375/400
119/119 [==============================] - 0s 2ms/step - loss: 28883474432.0000 - val_loss: 28612573184.0000
Epoch 376/400
119/119 [==============================] - 0s 2ms/step - loss: 28848177152.0000 - val_loss: 28630839296.0000
Epoch 377/400
119/119 [==============================] - 0s 3ms/step - loss: 28815708160.0000 - val_loss: 28588333056.0000
Epoch 378/400
119/119 [==============================] - 0s 2ms/step - loss: 28838383616.0000 - val_loss: 28606842880.0000
Epoch 379/400
119/119 [==============================] - 0s 2ms/step - loss: 28821850112.0000 - val_loss: 28594984960.0000
Epoch 380/400
119/119 [==============================] - 0s 2ms/step - loss: 28788594688.0000 - val_loss: 28573669376.0000
Epoch 381/400
119/119 [==============================] - 0s 2ms/step - loss: 28780359680.0000 - val_loss: 28560508928.0000
Epoch 382/400
119/119 [==============================] - 0s 2ms/step - loss: 28788803584.0000 - val_loss: 28558036992.0000
Epoch 383/400
119/119 [==============================] - 0s 2ms/step - loss: 28730036224.0000 - val_loss: 28653473792.0000
Epoch 384/400
119/119 [==============================] - 0s 2ms/step - loss: 28732454912.0000 - val_loss: 28484388864.0000
Epoch 385/400
119/119 [==============================] - 0s 2ms/step - loss: 28697399296.0000 - val_loss: 28484245504.0000
Epoch 386/400
119/119 [==============================] - 0s 2ms/step - loss: 28666468352.0000 - val_loss: 28478556160.0000
Epoch 387/400
119/119 [==============================] - 0s 2ms/step - loss: 28654766080.0000 - val_loss: 28511408128.0000
Epoch 388/400
119/119 [==============================] - 0s 2ms/step - loss: 28694114304.0000 - val_loss: 28442587136.0000
Epoch 389/400
119/119 [==============================] - 0s 2ms/step - loss: 28647395328.0000 - val_loss: 28517748736.0000
Epoch 390/400
119/119 [==============================] - 0s 2ms/step - loss: 28618108928.0000 - val_loss: 28562548736.0000
Epoch 391/400
119/119 [==============================] - 0s 2ms/step - loss: 28587141120.0000 - val_loss: 28427149312.0000
Epoch 392/400
119/119 [==============================] - 0s 2ms/step - loss: 28595707904.0000 - val_loss: 28393398272.0000
Epoch 393/400
119/119 [==============================] - 0s 2ms/step - loss: 28575948800.0000 - val_loss: 28389074944.0000
Epoch 394/400
119/119 [==============================] - 0s 2ms/step - loss: 28554334208.0000 - val_loss: 28384045056.0000
Epoch 395/400
119/119 [==============================] - 0s 2ms/step - loss: 28556212224.0000 - val_loss: 28463263744.0000
Epoch 396/400
119/119 [==============================] - 0s 2ms/step - loss: 28500111360.0000 - val_loss: 28361271296.0000
Epoch 397/400
119/119 [==============================] - 0s 2ms/step - loss: 28504750080.0000 - val_loss: 28363948032.0000
Epoch 398/400
119/119 [==============================] - 0s 2ms/step - loss: 28482848768.0000 - val_loss: 28319332352.0000
Epoch 399/400
119/119 [==============================] - 0s 2ms/step - loss: 28451512320.0000 - val_loss: 28296255488.0000
Epoch 400/400
119/119 [==============================] - 0s 2ms/step - loss: 28474619904.0000 - val_loss: 28299231232.0000
In [82]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,explained_variance_score

Predicting on Brand New Data

In [81]:
X_test
Out[81]:
array([[0.18181818, 0.125     , 0.05735849, ..., 0.010289  , 1.        ,
        0.        ],
       [0.36363636, 0.28125   , 0.13207547, ..., 0.00909894, 1.        ,
        0.27272727],
       [0.36363636, 0.3125    , 0.16679245, ..., 0.00418246, 1.        ,
        0.09090909],
       ...,
       [0.27272727, 0.25      , 0.17584906, ..., 0.00821673, 0.        ,
        0.90909091],
       [0.18181818, 0.125     , 0.04603774, ..., 0.0044122 , 1.        ,
        0.27272727],
       [0.36363636, 0.4375    , 0.24226415, ..., 0.01018447, 0.        ,
        0.72727273]])
In [83]:
predictions = model.predict(X_test)
In [84]:
mean_absolute_error(y_test,predictions)
Out[84]:
103844.17806451458
In [85]:
np.sqrt(mean_squared_error(y_test,predictions))
Out[85]:
168223.76584903873
In [86]:
explained_variance_score(y_test,predictions)
Out[86]:
0.7983208164522808
In [87]:
df['price'].mean()
Out[87]:
540088.1417665294
In [88]:
df['price'].median()
Out[88]:
450000.0
In [89]:
# Our predictions
plt.scatter(y_test,predictions)

# Perfect predictions
plt.plot(y_test,y_test,'r')
Out[89]:
[<matplotlib.lines.Line2D at 0x1ca668bfc40>]
In [ ]:
errors = y_test.values.reshape(6480, 1) - predictions
In [91]:
sns.distplot(errors)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-91-4a151ca7bc24> in <module>
----> 1 sns.distplot(errors)

NameError: name 'errors' is not defined

Predicting on a brand new house

In [92]:
single_house = df.drop('price',axis=1).iloc[0]
In [93]:
single_house = scaler.transform(single_house.values.reshape(-1, 19))
In [94]:
single_house
Out[94]:
array([[0.27272727, 0.125     , 0.06716981, 0.00310751, 0.        ,
        0.        , 0.        , 0.5       , 0.5       , 0.09758772,
        0.        , 0.47826087, 0.        , 0.57149751, 0.21760797,
        0.16193426, 0.00573322, 0.        , 0.81818182]])
In [95]:
model.predict(single_house)
Out[95]:
array([[272835.8]], dtype=float32)
In [96]:
df.iloc[0]
Out[96]:
price            221900.0000
bedrooms              3.0000
bathrooms             1.0000
sqft_living        1180.0000
sqft_lot           5650.0000
floors                1.0000
waterfront            0.0000
view                  0.0000
condition             3.0000
grade                 7.0000
sqft_above         1180.0000
sqft_basement         0.0000
yr_built           1955.0000
yr_renovated          0.0000
lat                  47.5112
long               -122.2570
sqft_living15      1340.0000
sqft_lot15         5650.0000
year               2014.0000
month                10.0000
Name: 0, dtype: float64

Keras TF 2.0 - Code Along Classification Project

Let's explore a classification task with Keras API for TF 2.0

The Data

Breast cancer wisconsin (diagnostic) dataset


Data Set Characteristics:

:Number of Instances: 569

:Number of Attributes: 30 numeric, predictive attributes and the class

:Attribute Information:
    - radius (mean of distances from center to points on the perimeter)
    - texture (standard deviation of gray-scale values)
    - perimeter
    - area
    - smoothness (local variation in radius lengths)
    - compactness (perimeter^2 / area - 1.0)
    - concavity (severity of concave portions of the contour)
    - concave points (number of concave portions of the contour)
    - symmetry 
    - fractal dimension ("coastline approximation" - 1)

    The mean, standard error, and "worst" or largest (mean of the three
    largest values) of these features were computed for each image,
    resulting in 30 features.  For instance, field 3 is Mean Radius, field
    13 is Radius SE, field 23 is Worst Radius.

    - class:
            - WDBC-Malignant
            - WDBC-Benign

:Summary Statistics:

===================================== ====== ======
                                       Min    Max
===================================== ====== ======
radius (mean):                        6.981  28.11
texture (mean):                       9.71   39.28
perimeter (mean):                     43.79  188.5
area (mean):                          143.5  2501.0
smoothness (mean):                    0.053  0.163
compactness (mean):                   0.019  0.345
concavity (mean):                     0.0    0.427
concave points (mean):                0.0    0.201
symmetry (mean):                      0.106  0.304
fractal dimension (mean):             0.05   0.097
radius (standard error):              0.112  2.873
texture (standard error):             0.36   4.885
perimeter (standard error):           0.757  21.98
area (standard error):                6.802  542.2
smoothness (standard error):          0.002  0.031
compactness (standard error):         0.002  0.135
concavity (standard error):           0.0    0.396
concave points (standard error):      0.0    0.053
symmetry (standard error):            0.008  0.079
fractal dimension (standard error):   0.001  0.03
radius (worst):                       7.93   36.04
texture (worst):                      12.02  49.54
perimeter (worst):                    50.41  251.2
area (worst):                         185.2  4254.0
smoothness (worst):                   0.071  0.223
compactness (worst):                  0.027  1.058
concavity (worst):                    0.0    1.252
concave points (worst):               0.0    0.291
symmetry (worst):                     0.156  0.664
fractal dimension (worst):            0.055  0.208
===================================== ====== ======

:Missing Attribute Values: None

:Class Distribution: 212 - Malignant, 357 - Benign

:Creator:  Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian

:Donor: Nick Street

:Date: November, 1995

This is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets. https://goo.gl/U2Uwz2

Features are computed from a digitized image of a fine needle aspirate (FNA) of a breast mass. They describe characteristics of the cell nuclei present in the image.

Separating plane described above was obtained using Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree Construction Via Linear Programming." Proceedings of the 4th Midwest Artificial Intelligence and Cognitive Science Society, pp. 97-101, 1992], a classification method which uses linear programming to construct a decision tree. Relevant features were selected using an exhaustive search in the space of 1-4 features and 1-3 separating planes.

The actual linear program used to obtain the separating plane in the 3-dimensional space is that described in: [K. P. Bennett and O. L. Mangasarian: "Robust Linear Programming Discrimination of Two Linearly Inseparable Sets", Optimization Methods and Software 1, 1992, 23-34].

This database is also available through the UW CS ftp server:

ftp ftp.cs.wisc.edu cd math-prog/cpo-dataset/machine-learn/WDBC/

.. topic:: References

  • W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science and Technology, volume 1905, pages 861-870, San Jose, CA, 1993.
  • O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and prognosis via linear programming. Operations Research, 43(4), pages 570-577, July-August 1995.
  • W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) 163-171.
In [57]:
df = pd.read_csv('../DATA/cancer_classification.csv')
In [58]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
mean radius                569 non-null float64
mean texture               569 non-null float64
mean perimeter             569 non-null float64
mean area                  569 non-null float64
mean smoothness            569 non-null float64
mean compactness           569 non-null float64
mean concavity             569 non-null float64
mean concave points        569 non-null float64
mean symmetry              569 non-null float64
mean fractal dimension     569 non-null float64
radius error               569 non-null float64
texture error              569 non-null float64
perimeter error            569 non-null float64
area error                 569 non-null float64
smoothness error           569 non-null float64
compactness error          569 non-null float64
concavity error            569 non-null float64
concave points error       569 non-null float64
symmetry error             569 non-null float64
fractal dimension error    569 non-null float64
worst radius               569 non-null float64
worst texture              569 non-null float64
worst perimeter            569 non-null float64
worst area                 569 non-null float64
worst smoothness           569 non-null float64
worst compactness          569 non-null float64
worst concavity            569 non-null float64
worst concave points       569 non-null float64
worst symmetry             569 non-null float64
worst fractal dimension    569 non-null float64
benign_0__mal_1            569 non-null int64
dtypes: float64(30), int64(1)
memory usage: 137.9 KB
In [59]:
df.describe().transpose()
Out[59]:
count mean std min 25% 50% 75% max
mean radius 569.0 14.127292 3.524049 6.981000 11.700000 13.370000 15.780000 28.11000
mean texture 569.0 19.289649 4.301036 9.710000 16.170000 18.840000 21.800000 39.28000
mean perimeter 569.0 91.969033 24.298981 43.790000 75.170000 86.240000 104.100000 188.50000
mean area 569.0 654.889104 351.914129 143.500000 420.300000 551.100000 782.700000 2501.00000
mean smoothness 569.0 0.096360 0.014064 0.052630 0.086370 0.095870 0.105300 0.16340
mean compactness 569.0 0.104341 0.052813 0.019380 0.064920 0.092630 0.130400 0.34540
mean concavity 569.0 0.088799 0.079720 0.000000 0.029560 0.061540 0.130700 0.42680
mean concave points 569.0 0.048919 0.038803 0.000000 0.020310 0.033500 0.074000 0.20120
mean symmetry 569.0 0.181162 0.027414 0.106000 0.161900 0.179200 0.195700 0.30400
mean fractal dimension 569.0 0.062798 0.007060 0.049960 0.057700 0.061540 0.066120 0.09744
radius error 569.0 0.405172 0.277313 0.111500 0.232400 0.324200 0.478900 2.87300
texture error 569.0 1.216853 0.551648 0.360200 0.833900 1.108000 1.474000 4.88500
perimeter error 569.0 2.866059 2.021855 0.757000 1.606000 2.287000 3.357000 21.98000
area error 569.0 40.337079 45.491006 6.802000 17.850000 24.530000 45.190000 542.20000
smoothness error 569.0 0.007041 0.003003 0.001713 0.005169 0.006380 0.008146 0.03113
compactness error 569.0 0.025478 0.017908 0.002252 0.013080 0.020450 0.032450 0.13540
concavity error 569.0 0.031894 0.030186 0.000000 0.015090 0.025890 0.042050 0.39600
concave points error 569.0 0.011796 0.006170 0.000000 0.007638 0.010930 0.014710 0.05279
symmetry error 569.0 0.020542 0.008266 0.007882 0.015160 0.018730 0.023480 0.07895
fractal dimension error 569.0 0.003795 0.002646 0.000895 0.002248 0.003187 0.004558 0.02984
worst radius 569.0 16.269190 4.833242 7.930000 13.010000 14.970000 18.790000 36.04000
worst texture 569.0 25.677223 6.146258 12.020000 21.080000 25.410000 29.720000 49.54000
worst perimeter 569.0 107.261213 33.602542 50.410000 84.110000 97.660000 125.400000 251.20000
worst area 569.0 880.583128 569.356993 185.200000 515.300000 686.500000 1084.000000 4254.00000
worst smoothness 569.0 0.132369 0.022832 0.071170 0.116600 0.131300 0.146000 0.22260
worst compactness 569.0 0.254265 0.157336 0.027290 0.147200 0.211900 0.339100 1.05800
worst concavity 569.0 0.272188 0.208624 0.000000 0.114500 0.226700 0.382900 1.25200
worst concave points 569.0 0.114606 0.065732 0.000000 0.064930 0.099930 0.161400 0.29100
worst symmetry 569.0 0.290076 0.061867 0.156500 0.250400 0.282200 0.317900 0.66380
worst fractal dimension 569.0 0.083946 0.018061 0.055040 0.071460 0.080040 0.092080 0.20750
benign_0__mal_1 569.0 0.627417 0.483918 0.000000 0.000000 1.000000 1.000000 1.00000

EDA

In [60]:
import seaborn as sns
import matplotlib.pyplot as plt
In [62]:
sns.countplot(x='benign_0__mal_1',data=df)
Out[62]:
<matplotlib.axes._subplots.AxesSubplot at 0x25e52e6d8c8>
In [63]:
sns.heatmap(df.corr())
Out[63]:
<matplotlib.axes._subplots.AxesSubplot at 0x25e52edfa48>
In [66]:
df.corr()['benign_0__mal_1'].sort_values()
Out[66]:
worst concave points      -0.793566
worst perimeter           -0.782914
mean concave points       -0.776614
worst radius              -0.776454
mean perimeter            -0.742636
worst area                -0.733825
mean radius               -0.730029
mean area                 -0.708984
mean concavity            -0.696360
worst concavity           -0.659610
mean compactness          -0.596534
worst compactness         -0.590998
radius error              -0.567134
perimeter error           -0.556141
area error                -0.548236
worst texture             -0.456903
worst smoothness          -0.421465
worst symmetry            -0.416294
mean texture              -0.415185
concave points error      -0.408042
mean smoothness           -0.358560
mean symmetry             -0.330499
worst fractal dimension   -0.323872
compactness error         -0.292999
concavity error           -0.253730
fractal dimension error   -0.077972
symmetry error             0.006522
texture error              0.008303
mean fractal dimension     0.012838
smoothness error           0.067016
benign_0__mal_1            1.000000
Name: benign_0__mal_1, dtype: float64
In [68]:
df.corr()['benign_0__mal_1'].sort_values().plot(kind='bar')
Out[68]:
<matplotlib.axes._subplots.AxesSubplot at 0x25e530c9348>
In [70]:
df.corr()['benign_0__mal_1'][:-1].sort_values().plot(kind='bar')
Out[70]:
<matplotlib.axes._subplots.AxesSubplot at 0x25e532c8f08>

Train Test Split

In [73]:
X = df.drop('benign_0__mal_1',axis=1).values
y = df['benign_0__mal_1'].values
In [74]:
from sklearn.model_selection import train_test_split
In [76]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=101)

Scaling Data

In [77]:
from sklearn.preprocessing import MinMaxScaler
In [78]:
scaler = MinMaxScaler()
In [79]:
scaler.fit(X_train)
Out[79]:
MinMaxScaler(copy=True, feature_range=(0, 1))
In [80]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

Creating the Model

# For a binary classification problem
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])
In [98]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Dropout
In [99]:
X_train.shape
Out[99]:
(426, 30)
In [111]:
model = Sequential()

# https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw

model.add(Dense(units=30,activation='relu'))

model.add(Dense(units=15,activation='relu'))


model.add(Dense(units=1,activation='sigmoid'))

# For a binary classification problem
model.compile(loss='binary_crossentropy', optimizer='adam')

Training the Model

Example One: Choosing too many epochs and overfitting!

In [112]:
# https://stats.stackexchange.com/questions/164876/tradeoff-batch-size-vs-number-of-iterations-to-train-a-neural-network
# https://datascience.stackexchange.com/questions/18414/are-there-any-rules-for-choosing-the-size-of-a-mini-batch

model.fit(x=X_train, 
          y=y_train, 
          epochs=600,
          validation_data=(X_test, y_test), verbose=1
          )
Train on 426 samples, validate on 143 samples
Epoch 1/600
426/426 [==============================] - 0s 964us/sample - loss: 0.6673 - val_loss: 0.6458
Epoch 2/600
426/426 [==============================] - 0s 102us/sample - loss: 0.6248 - val_loss: 0.6064
Epoch 3/600
426/426 [==============================] - 0s 98us/sample - loss: 0.5845 - val_loss: 0.5600
Epoch 4/600
426/426 [==============================] - 0s 101us/sample - loss: 0.5380 - val_loss: 0.5073
Epoch 5/600
426/426 [==============================] - 0s 103us/sample - loss: 0.4830 - val_loss: 0.4450
Epoch 6/600
426/426 [==============================] - 0s 97us/sample - loss: 0.4236 - val_loss: 0.3856
Epoch 7/600
426/426 [==============================] - 0s 103us/sample - loss: 0.3735 - val_loss: 0.3394
Epoch 8/600
426/426 [==============================] - 0s 99us/sample - loss: 0.3336 - val_loss: 0.3020
Epoch 9/600
426/426 [==============================] - 0s 101us/sample - loss: 0.3002 - val_loss: 0.2729
Epoch 10/600
426/426 [==============================] - 0s 102us/sample - loss: 0.2748 - val_loss: 0.2482
Epoch 11/600
426/426 [==============================] - 0s 96us/sample - loss: 0.2514 - val_loss: 0.2277
Epoch 12/600
426/426 [==============================] - 0s 99us/sample - loss: 0.2320 - val_loss: 0.2117
Epoch 13/600
426/426 [==============================] - 0s 100us/sample - loss: 0.2149 - val_loss: 0.2101
Epoch 14/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2037 - val_loss: 0.1892
Epoch 15/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1907 - val_loss: 0.1806
Epoch 16/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1771 - val_loss: 0.1825
Epoch 17/600
426/426 [==============================] - 0s 105us/sample - loss: 0.1690 - val_loss: 0.1657
Epoch 18/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1621 - val_loss: 0.1664
Epoch 19/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1530 - val_loss: 0.1562
Epoch 20/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1423 - val_loss: 0.1592
Epoch 21/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1363 - val_loss: 0.1490
Epoch 22/600
426/426 [==============================] - 0s 99us/sample - loss: 0.1314 - val_loss: 0.1488
Epoch 23/600
426/426 [==============================] - 0s 107us/sample - loss: 0.1256 - val_loss: 0.1509
Epoch 24/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1185 - val_loss: 0.1422
Epoch 25/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1137 - val_loss: 0.1411
Epoch 26/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1097 - val_loss: 0.1377
Epoch 27/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1046 - val_loss: 0.1364
Epoch 28/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1012 - val_loss: 0.1366
Epoch 29/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0975 - val_loss: 0.1359
Epoch 30/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0944 - val_loss: 0.1356
Epoch 31/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0908 - val_loss: 0.1316
Epoch 32/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0893 - val_loss: 0.1372
Epoch 33/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0866 - val_loss: 0.1285
Epoch 34/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0844 - val_loss: 0.1328
Epoch 35/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0829 - val_loss: 0.1270
Epoch 36/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0807 - val_loss: 0.1327
Epoch 37/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0786 - val_loss: 0.1312
Epoch 38/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0775 - val_loss: 0.1286
Epoch 39/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0764 - val_loss: 0.1324
Epoch 40/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0816 - val_loss: 0.1247
Epoch 41/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0771 - val_loss: 0.1316
Epoch 42/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0776 - val_loss: 0.1291
Epoch 43/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0705 - val_loss: 0.1274
Epoch 44/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0699 - val_loss: 0.1250
Epoch 45/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0704 - val_loss: 0.1318
Epoch 46/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0673 - val_loss: 0.1225
Epoch 47/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0700 - val_loss: 0.1280
Epoch 48/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0691 - val_loss: 0.1240
Epoch 49/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0674 - val_loss: 0.1252
Epoch 50/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0636 - val_loss: 0.1316
Epoch 51/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0630 - val_loss: 0.1237
Epoch 52/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0638 - val_loss: 0.1253
Epoch 53/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0632 - val_loss: 0.1246
Epoch 54/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0632 - val_loss: 0.1263
Epoch 55/600
426/426 [==============================] - 0s 112us/sample - loss: 0.0611 - val_loss: 0.1239
Epoch 56/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0611 - val_loss: 0.1293
Epoch 57/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0611 - val_loss: 0.1237
Epoch 58/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0613 - val_loss: 0.1278
Epoch 59/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0587 - val_loss: 0.1239
Epoch 60/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0596 - val_loss: 0.1298
Epoch 61/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0597 - val_loss: 0.1254
Epoch 62/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0572 - val_loss: 0.1245
Epoch 63/600
426/426 [==============================] - 0s 106us/sample - loss: 0.0583 - val_loss: 0.1343
Epoch 64/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0577 - val_loss: 0.1251
Epoch 65/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0560 - val_loss: 0.1301
Epoch 66/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0606 - val_loss: 0.1225
Epoch 67/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0617 - val_loss: 0.1353
Epoch 68/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0567 - val_loss: 0.1250
Epoch 69/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0556 - val_loss: 0.1308
Epoch 70/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0567 - val_loss: 0.1309
Epoch 71/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0547 - val_loss: 0.1306
Epoch 72/600
426/426 [==============================] - 0s 92us/sample - loss: 0.0543 - val_loss: 0.1284
Epoch 73/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0548 - val_loss: 0.1339
Epoch 74/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0565 - val_loss: 0.1225
Epoch 75/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0541 - val_loss: 0.1336
Epoch 76/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0535 - val_loss: 0.1339
Epoch 77/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0523 - val_loss: 0.1267
Epoch 78/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0523 - val_loss: 0.1315
Epoch 79/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0517 - val_loss: 0.1303
Epoch 80/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0520 - val_loss: 0.1271
Epoch 81/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0534 - val_loss: 0.1277
Epoch 82/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0518 - val_loss: 0.1277
Epoch 83/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0512 - val_loss: 0.1321
Epoch 84/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0552 - val_loss: 0.1262
Epoch 85/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0643 - val_loss: 0.1282
Epoch 86/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0605 - val_loss: 0.1299
Epoch 87/600
426/426 [==============================] - 0s 113us/sample - loss: 0.0565 - val_loss: 0.1299
Epoch 88/600
426/426 [==============================] - 0s 112us/sample - loss: 0.0519 - val_loss: 0.1324
Epoch 89/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0534 - val_loss: 0.1269
Epoch 90/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0538 - val_loss: 0.1299
Epoch 91/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0497 - val_loss: 0.1299
Epoch 92/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0499 - val_loss: 0.1330
Epoch 93/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0504 - val_loss: 0.1311
Epoch 94/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0503 - val_loss: 0.1352
Epoch 95/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0498 - val_loss: 0.1351
Epoch 96/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0486 - val_loss: 0.1347
Epoch 97/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0497 - val_loss: 0.1300
Epoch 98/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0485 - val_loss: 0.1329
Epoch 99/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0478 - val_loss: 0.1362
Epoch 100/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0475 - val_loss: 0.1294
Epoch 101/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0476 - val_loss: 0.1380
Epoch 102/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0498 - val_loss: 0.1297
Epoch 103/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0519 - val_loss: 0.1356
Epoch 104/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0498 - val_loss: 0.1380
Epoch 105/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0466 - val_loss: 0.1304
Epoch 106/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0462 - val_loss: 0.1409
Epoch 107/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0470 - val_loss: 0.1320
Epoch 108/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0467 - val_loss: 0.1341
Epoch 109/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0475 - val_loss: 0.1392
Epoch 110/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0498 - val_loss: 0.1330
Epoch 111/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0479 - val_loss: 0.1369
Epoch 112/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0490 - val_loss: 0.1372
Epoch 113/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0455 - val_loss: 0.1348
Epoch 114/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0456 - val_loss: 0.1321
Epoch 115/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0460 - val_loss: 0.1313
Epoch 116/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0458 - val_loss: 0.1320
Epoch 117/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0451 - val_loss: 0.1341
Epoch 118/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0453 - val_loss: 0.1344
Epoch 119/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0450 - val_loss: 0.1341
Epoch 120/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0460 - val_loss: 0.1333
Epoch 121/600
426/426 [==============================] - 0s 107us/sample - loss: 0.0457 - val_loss: 0.1310
Epoch 122/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0443 - val_loss: 0.1332
Epoch 123/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0460 - val_loss: 0.1421
Epoch 124/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0498 - val_loss: 0.1367
Epoch 125/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0432 - val_loss: 0.1312
Epoch 126/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0440 - val_loss: 0.1410
Epoch 127/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0428 - val_loss: 0.1315
Epoch 128/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0451 - val_loss: 0.1460
Epoch 129/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0456 - val_loss: 0.1310
Epoch 130/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0432 - val_loss: 0.1408
Epoch 131/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0454 - val_loss: 0.1395
Epoch 132/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0500 - val_loss: 0.1322
Epoch 133/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0456 - val_loss: 0.1480
Epoch 134/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0437 - val_loss: 0.1339
Epoch 135/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0445 - val_loss: 0.1415
Epoch 136/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0424 - val_loss: 0.1360
Epoch 137/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0416 - val_loss: 0.1380
Epoch 138/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0439 - val_loss: 0.1328
Epoch 139/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0417 - val_loss: 0.1475
Epoch 140/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0462 - val_loss: 0.1382
Epoch 141/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0434 - val_loss: 0.1356
Epoch 142/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0421 - val_loss: 0.1381
Epoch 143/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0436 - val_loss: 0.1445
Epoch 144/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0416 - val_loss: 0.1435
Epoch 145/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0415 - val_loss: 0.1375
Epoch 146/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0413 - val_loss: 0.1398
Epoch 147/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0409 - val_loss: 0.1445
Epoch 148/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0418 - val_loss: 0.1514
Epoch 149/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0407 - val_loss: 0.1379
Epoch 150/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0420 - val_loss: 0.1391
Epoch 151/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0457 - val_loss: 0.1364
Epoch 152/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0436 - val_loss: 0.1399
Epoch 153/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0511 - val_loss: 0.1506
Epoch 154/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0421 - val_loss: 0.1389
Epoch 155/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0399 - val_loss: 0.1443
Epoch 156/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0407 - val_loss: 0.1392
Epoch 157/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0396 - val_loss: 0.1438
Epoch 158/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0405 - val_loss: 0.1420
Epoch 159/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0415 - val_loss: 0.1415
Epoch 160/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0392 - val_loss: 0.1436
Epoch 161/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0396 - val_loss: 0.1379
Epoch 162/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0402 - val_loss: 0.1473
Epoch 163/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0414 - val_loss: 0.1449
Epoch 164/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0442 - val_loss: 0.1432
Epoch 165/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0382 - val_loss: 0.1469
Epoch 166/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0423 - val_loss: 0.1368
Epoch 167/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0408 - val_loss: 0.1430
Epoch 168/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0423 - val_loss: 0.1454
Epoch 169/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0513 - val_loss: 0.1596
Epoch 170/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0412 - val_loss: 0.1386
Epoch 171/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0396 - val_loss: 0.1535
Epoch 172/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0401 - val_loss: 0.1374
Epoch 173/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0351 - val_loss: 0.1625
Epoch 174/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0408 - val_loss: 0.1379
Epoch 175/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0386 - val_loss: 0.1483
Epoch 176/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0406 - val_loss: 0.1434
Epoch 177/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0376 - val_loss: 0.1432
Epoch 178/600
426/426 [==============================] - 0s 90us/sample - loss: 0.0372 - val_loss: 0.1443
Epoch 179/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0366 - val_loss: 0.1419
Epoch 180/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0366 - val_loss: 0.1459
Epoch 181/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0451 - val_loss: 0.1406
Epoch 182/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0416 - val_loss: 0.1570
Epoch 183/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0418 - val_loss: 0.1458
Epoch 184/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0412 - val_loss: 0.1423
Epoch 185/600
426/426 [==============================] - 0s 92us/sample - loss: 0.0377 - val_loss: 0.1433
Epoch 186/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0358 - val_loss: 0.1525
Epoch 187/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0402 - val_loss: 0.1502
Epoch 188/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0379 - val_loss: 0.1486
Epoch 189/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0379 - val_loss: 0.1440
Epoch 190/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0390 - val_loss: 0.1431
Epoch 191/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0421 - val_loss: 0.1613
Epoch 192/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0374 - val_loss: 0.1401
Epoch 193/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0363 - val_loss: 0.1562
Epoch 194/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0363 - val_loss: 0.1449
Epoch 195/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0380 - val_loss: 0.1410
Epoch 196/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0420 - val_loss: 0.1639
Epoch 197/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0412 - val_loss: 0.1407
Epoch 198/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0367 - val_loss: 0.1416
Epoch 199/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0384 - val_loss: 0.1485
Epoch 200/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0346 - val_loss: 0.1422
Epoch 201/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0349 - val_loss: 0.1519
Epoch 202/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0369 - val_loss: 0.1415
Epoch 203/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0354 - val_loss: 0.1503
Epoch 204/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0345 - val_loss: 0.1458
Epoch 205/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0350 - val_loss: 0.1479
Epoch 206/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0344 - val_loss: 0.1448
Epoch 207/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0351 - val_loss: 0.1492
Epoch 208/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0346 - val_loss: 0.1490
Epoch 209/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0360 - val_loss: 0.1429
Epoch 210/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0375 - val_loss: 0.1582
Epoch 211/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0367 - val_loss: 0.1475
Epoch 212/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0358 - val_loss: 0.1653
Epoch 213/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0367 - val_loss: 0.1441
Epoch 214/600
426/426 [==============================] - 0s 92us/sample - loss: 0.0355 - val_loss: 0.1547
Epoch 215/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0332 - val_loss: 0.1495
Epoch 216/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0341 - val_loss: 0.1525
Epoch 217/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0440 - val_loss: 0.1626
Epoch 218/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0347 - val_loss: 0.1480
Epoch 219/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0335 - val_loss: 0.1590
Epoch 220/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0340 - val_loss: 0.1510
Epoch 221/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0327 - val_loss: 0.1533
Epoch 222/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0331 - val_loss: 0.1521
Epoch 223/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0330 - val_loss: 0.1458
Epoch 224/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0344 - val_loss: 0.1552
Epoch 225/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0363 - val_loss: 0.1480
Epoch 226/600
426/426 [==============================] - 0s 117us/sample - loss: 0.0345 - val_loss: 0.1536
Epoch 227/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0368 - val_loss: 0.1534
Epoch 228/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0333 - val_loss: 0.1475
Epoch 229/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0330 - val_loss: 0.1561
Epoch 230/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0323 - val_loss: 0.1467
Epoch 231/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0325 - val_loss: 0.1627
Epoch 232/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0400 - val_loss: 0.1484
Epoch 233/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0350 - val_loss: 0.1521
Epoch 234/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0369 - val_loss: 0.1465
Epoch 235/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0326 - val_loss: 0.1488
Epoch 236/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0316 - val_loss: 0.1583
Epoch 237/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0313 - val_loss: 0.1491
Epoch 238/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0325 - val_loss: 0.1653
Epoch 239/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0336 - val_loss: 0.1483
Epoch 240/600
426/426 [==============================] - 0s 92us/sample - loss: 0.0392 - val_loss: 0.1622
Epoch 241/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0355 - val_loss: 0.1462
Epoch 242/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0342 - val_loss: 0.1559
Epoch 243/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0326 - val_loss: 0.1524
Epoch 244/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0337 - val_loss: 0.1512
Epoch 245/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0311 - val_loss: 0.1555
Epoch 246/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0310 - val_loss: 0.1544
Epoch 247/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0316 - val_loss: 0.1527
Epoch 248/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0332 - val_loss: 0.1562
Epoch 249/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0341 - val_loss: 0.1558
Epoch 250/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0348 - val_loss: 0.1573
Epoch 251/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0368 - val_loss: 0.1557
Epoch 252/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0365 - val_loss: 0.1510
Epoch 253/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0374 - val_loss: 0.1639
Epoch 254/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0398 - val_loss: 0.1512
Epoch 255/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0348 - val_loss: 0.1670
Epoch 256/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0319 - val_loss: 0.1571
Epoch 257/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0302 - val_loss: 0.1527
Epoch 258/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0306 - val_loss: 0.1583
Epoch 259/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0295 - val_loss: 0.1627
Epoch 260/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0303 - val_loss: 0.1566
Epoch 261/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0324 - val_loss: 0.1585
Epoch 262/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0358 - val_loss: 0.1521
Epoch 263/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0303 - val_loss: 0.1558
Epoch 264/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0294 - val_loss: 0.1623
Epoch 265/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0304 - val_loss: 0.1537
Epoch 266/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0286 - val_loss: 0.1610
Epoch 267/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0325 - val_loss: 0.1593
Epoch 268/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0290 - val_loss: 0.1614
Epoch 269/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0302 - val_loss: 0.1569
Epoch 270/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0302 - val_loss: 0.1606
Epoch 271/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0299 - val_loss: 0.1534
Epoch 272/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0323 - val_loss: 0.1685
Epoch 273/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0324 - val_loss: 0.1547
Epoch 274/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0307 - val_loss: 0.1591
Epoch 275/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0300 - val_loss: 0.1568
Epoch 276/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0333 - val_loss: 0.1598
Epoch 277/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0345 - val_loss: 0.1603
Epoch 278/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0311 - val_loss: 0.1548
Epoch 279/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0297 - val_loss: 0.1615
Epoch 280/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0299 - val_loss: 0.1665
Epoch 281/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0346 - val_loss: 0.1578
Epoch 282/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0314 - val_loss: 0.1679
Epoch 283/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0296 - val_loss: 0.1588
Epoch 284/600
426/426 [==============================] - 0s 112us/sample - loss: 0.0296 - val_loss: 0.1658
Epoch 285/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0298 - val_loss: 0.1585
Epoch 286/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0298 - val_loss: 0.1668
Epoch 287/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0299 - val_loss: 0.1600
Epoch 288/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0291 - val_loss: 0.1587
Epoch 289/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0289 - val_loss: 0.1629
Epoch 290/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0295 - val_loss: 0.1615
Epoch 291/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0280 - val_loss: 0.1659
Epoch 292/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0271 - val_loss: 0.1610
Epoch 293/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0277 - val_loss: 0.1604
Epoch 294/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0317 - val_loss: 0.1742
Epoch 295/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0280 - val_loss: 0.1588
Epoch 296/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0319 - val_loss: 0.1804
Epoch 297/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0304 - val_loss: 0.1597
Epoch 298/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0285 - val_loss: 0.1638
Epoch 299/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0311 - val_loss: 0.1644
Epoch 300/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0281 - val_loss: 0.1681
Epoch 301/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0309 - val_loss: 0.1644
Epoch 302/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0273 - val_loss: 0.1624
Epoch 303/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0280 - val_loss: 0.1635
Epoch 304/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0282 - val_loss: 0.1725
Epoch 305/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0270 - val_loss: 0.1660
Epoch 306/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0266 - val_loss: 0.1724
Epoch 307/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0351 - val_loss: 0.1800
Epoch 308/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0316 - val_loss: 0.1581
Epoch 309/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0409 - val_loss: 0.1652
Epoch 310/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0312 - val_loss: 0.1590
Epoch 311/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0291 - val_loss: 0.1667
Epoch 312/600
426/426 [==============================] - 0s 92us/sample - loss: 0.0268 - val_loss: 0.1658
Epoch 313/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0269 - val_loss: 0.1647
Epoch 314/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0267 - val_loss: 0.1750
Epoch 315/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0289 - val_loss: 0.1645
Epoch 316/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0263 - val_loss: 0.1651
Epoch 317/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0271 - val_loss: 0.1685
Epoch 318/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0259 - val_loss: 0.1666
Epoch 319/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0308 - val_loss: 0.1856
Epoch 320/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0316 - val_loss: 0.1647
Epoch 321/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0253 - val_loss: 0.1699
Epoch 322/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0257 - val_loss: 0.1686
Epoch 323/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0282 - val_loss: 0.1691
Epoch 324/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0262 - val_loss: 0.1705
Epoch 325/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0256 - val_loss: 0.1700
Epoch 326/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0263 - val_loss: 0.1746
Epoch 327/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0262 - val_loss: 0.1723
Epoch 328/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0291 - val_loss: 0.1656
Epoch 329/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0263 - val_loss: 0.1727
Epoch 330/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0254 - val_loss: 0.1639
Epoch 331/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0267 - val_loss: 0.1698
Epoch 332/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0261 - val_loss: 0.1683
Epoch 333/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0245 - val_loss: 0.1697
Epoch 334/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0244 - val_loss: 0.1737
Epoch 335/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0280 - val_loss: 0.1743
Epoch 336/600
426/426 [==============================] - 0s 110us/sample - loss: 0.0252 - val_loss: 0.1680
Epoch 337/600
426/426 [==============================] - 0s 110us/sample - loss: 0.0254 - val_loss: 0.1666
Epoch 338/600
426/426 [==============================] - 0s 112us/sample - loss: 0.0277 - val_loss: 0.1650
Epoch 339/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0240 - val_loss: 0.1736
Epoch 340/600
426/426 [==============================] - 0s 106us/sample - loss: 0.0254 - val_loss: 0.1687
Epoch 341/600
426/426 [==============================] - 0s 110us/sample - loss: 0.0236 - val_loss: 0.1762
Epoch 342/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0265 - val_loss: 0.1681
Epoch 343/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0243 - val_loss: 0.1822
Epoch 344/600
426/426 [==============================] - 0s 110us/sample - loss: 0.0271 - val_loss: 0.1765
Epoch 345/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0227 - val_loss: 0.1669
Epoch 346/600
426/426 [==============================] - 0s 107us/sample - loss: 0.0243 - val_loss: 0.1769
Epoch 347/600
426/426 [==============================] - 0s 115us/sample - loss: 0.0238 - val_loss: 0.1697
Epoch 348/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0237 - val_loss: 0.1790
Epoch 349/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0246 - val_loss: 0.1717
Epoch 350/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0224 - val_loss: 0.1736
Epoch 351/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0228 - val_loss: 0.1732
Epoch 352/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0221 - val_loss: 0.1785
Epoch 353/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0262 - val_loss: 0.1692
Epoch 354/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0259 - val_loss: 0.1663
Epoch 355/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0247 - val_loss: 0.1713
Epoch 356/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0225 - val_loss: 0.1732
Epoch 357/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0220 - val_loss: 0.1781
Epoch 358/600
426/426 [==============================] - 0s 107us/sample - loss: 0.0262 - val_loss: 0.1703
Epoch 359/600
426/426 [==============================] - 0s 92us/sample - loss: 0.0217 - val_loss: 0.1794
Epoch 360/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0223 - val_loss: 0.1749
Epoch 361/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0229 - val_loss: 0.1825
Epoch 362/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0208 - val_loss: 0.1713
Epoch 363/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0231 - val_loss: 0.1762
Epoch 364/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0278 - val_loss: 0.1890
Epoch 365/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0268 - val_loss: 0.1737
Epoch 366/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0211 - val_loss: 0.1864
Epoch 367/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0216 - val_loss: 0.1728
Epoch 368/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0239 - val_loss: 0.1798
Epoch 369/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0228 - val_loss: 0.1750
Epoch 370/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0200 - val_loss: 0.1876
Epoch 371/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0211 - val_loss: 0.1779
Epoch 372/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0215 - val_loss: 0.1761
Epoch 373/600
426/426 [==============================] - 0s 93us/sample - loss: 0.0232 - val_loss: 0.1785
Epoch 374/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0211 - val_loss: 0.1877
Epoch 375/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0274 - val_loss: 0.1735
Epoch 376/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0226 - val_loss: 0.2063
Epoch 377/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0259 - val_loss: 0.1783
Epoch 378/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0241 - val_loss: 0.1969
Epoch 379/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0206 - val_loss: 0.1790
Epoch 380/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0237 - val_loss: 0.1883
Epoch 381/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0283 - val_loss: 0.1962
Epoch 382/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0208 - val_loss: 0.1871
Epoch 383/600
426/426 [==============================] - 0s 91us/sample - loss: 0.0202 - val_loss: 0.1858
Epoch 384/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0226 - val_loss: 0.1875
Epoch 385/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0227 - val_loss: 0.1905
Epoch 386/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0226 - val_loss: 0.1896
Epoch 387/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0264 - val_loss: 0.1810
Epoch 388/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0252 - val_loss: 0.1950
Epoch 389/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0231 - val_loss: 0.1759
Epoch 390/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0225 - val_loss: 0.1878
Epoch 391/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0198 - val_loss: 0.1865
Epoch 392/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0222 - val_loss: 0.1874
Epoch 393/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0244 - val_loss: 0.2052
Epoch 394/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0227 - val_loss: 0.1817
Epoch 395/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0250 - val_loss: 0.1920
Epoch 396/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0202 - val_loss: 0.1849
Epoch 397/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0204 - val_loss: 0.1876
Epoch 398/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0211 - val_loss: 0.1886
Epoch 399/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0207 - val_loss: 0.1881
Epoch 400/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0236 - val_loss: 0.2030
Epoch 401/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0247 - val_loss: 0.1862
Epoch 402/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0206 - val_loss: 0.1904
Epoch 403/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0200 - val_loss: 0.1892
Epoch 404/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0200 - val_loss: 0.2006
Epoch 405/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0205 - val_loss: 0.1897
Epoch 406/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0201 - val_loss: 0.1916
Epoch 407/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0187 - val_loss: 0.1949
Epoch 408/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0190 - val_loss: 0.1906
Epoch 409/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0184 - val_loss: 0.2064
Epoch 410/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0211 - val_loss: 0.1886
Epoch 411/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0190 - val_loss: 0.2037
Epoch 412/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0203 - val_loss: 0.1904
Epoch 413/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0188 - val_loss: 0.1972
Epoch 414/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0183 - val_loss: 0.1915
Epoch 415/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0189 - val_loss: 0.2010
Epoch 416/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0203 - val_loss: 0.1906
Epoch 417/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0257 - val_loss: 0.2045
Epoch 418/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0296 - val_loss: 0.1881
Epoch 419/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0225 - val_loss: 0.2040
Epoch 420/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0196 - val_loss: 0.1953
Epoch 421/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0193 - val_loss: 0.2070
Epoch 422/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0194 - val_loss: 0.1972
Epoch 423/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0239 - val_loss: 0.2093
Epoch 424/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0240 - val_loss: 0.1922
Epoch 425/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0231 - val_loss: 0.1969
Epoch 426/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0194 - val_loss: 0.1991
Epoch 427/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0177 - val_loss: 0.2021
Epoch 428/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0183 - val_loss: 0.2037
Epoch 429/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0193 - val_loss: 0.1944
Epoch 430/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0194 - val_loss: 0.2162
Epoch 431/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0164 - val_loss: 0.1949
Epoch 432/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0193 - val_loss: 0.2004
Epoch 433/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0169 - val_loss: 0.1973
Epoch 434/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0217 - val_loss: 0.2090
Epoch 435/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0219 - val_loss: 0.1976
Epoch 436/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0194 - val_loss: 0.2098
Epoch 437/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0168 - val_loss: 0.2021
Epoch 438/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0172 - val_loss: 0.2118
Epoch 439/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0182 - val_loss: 0.2113
Epoch 440/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0221 - val_loss: 0.2023
Epoch 441/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0194 - val_loss: 0.2199
Epoch 442/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0163 - val_loss: 0.2011
Epoch 443/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0166 - val_loss: 0.2101
Epoch 444/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0165 - val_loss: 0.2067
Epoch 445/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0164 - val_loss: 0.2075
Epoch 446/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0187 - val_loss: 0.2109
Epoch 447/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0154 - val_loss: 0.2033
Epoch 448/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0261 - val_loss: 0.2370
Epoch 449/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0260 - val_loss: 0.2038
Epoch 450/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0193 - val_loss: 0.2124
Epoch 451/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0162 - val_loss: 0.2102
Epoch 452/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0165 - val_loss: 0.2126
Epoch 453/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0224 - val_loss: 0.2130
Epoch 454/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0166 - val_loss: 0.2243
Epoch 455/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0160 - val_loss: 0.2127
Epoch 456/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0155 - val_loss: 0.2200
Epoch 457/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0159 - val_loss: 0.2233
Epoch 458/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0148 - val_loss: 0.2123
Epoch 459/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0151 - val_loss: 0.2156
Epoch 460/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0160 - val_loss: 0.2224
Epoch 461/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0166 - val_loss: 0.2143
Epoch 462/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0157 - val_loss: 0.2166
Epoch 463/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0159 - val_loss: 0.2318
Epoch 464/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0158 - val_loss: 0.2159
Epoch 465/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0150 - val_loss: 0.2128
Epoch 466/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0188 - val_loss: 0.2280
Epoch 467/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0148 - val_loss: 0.2164
Epoch 468/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0178 - val_loss: 0.2127
Epoch 469/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0177 - val_loss: 0.2232
Epoch 470/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0149 - val_loss: 0.2276
Epoch 471/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0147 - val_loss: 0.2184
Epoch 472/600
426/426 [==============================] - 0s 92us/sample - loss: 0.0220 - val_loss: 0.2294
Epoch 473/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0212 - val_loss: 0.2259
Epoch 474/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0209 - val_loss: 0.2275
Epoch 475/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0172 - val_loss: 0.2314
Epoch 476/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0160 - val_loss: 0.2173
Epoch 477/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0146 - val_loss: 0.2314
Epoch 478/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0151 - val_loss: 0.2286
Epoch 479/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0152 - val_loss: 0.2287
Epoch 480/600
426/426 [==============================] - 0s 106us/sample - loss: 0.0154 - val_loss: 0.2240
Epoch 481/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0147 - val_loss: 0.2266
Epoch 482/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0163 - val_loss: 0.2239
Epoch 483/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0158 - val_loss: 0.2335
Epoch 484/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0148 - val_loss: 0.2239
Epoch 485/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0158 - val_loss: 0.2303
Epoch 486/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0152 - val_loss: 0.2305
Epoch 487/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0142 - val_loss: 0.2315
Epoch 488/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0154 - val_loss: 0.2356
Epoch 489/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0144 - val_loss: 0.2294
Epoch 490/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0157 - val_loss: 0.2321
Epoch 491/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0156 - val_loss: 0.2358
Epoch 492/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0137 - val_loss: 0.2288
Epoch 493/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0156 - val_loss: 0.2278
Epoch 494/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0182 - val_loss: 0.2319
Epoch 495/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0167 - val_loss: 0.2367
Epoch 496/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0168 - val_loss: 0.2273
Epoch 497/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0152 - val_loss: 0.2356
Epoch 498/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0150 - val_loss: 0.2311
Epoch 499/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0145 - val_loss: 0.2311
Epoch 500/600
426/426 [==============================] - ETA: 0s - loss: 0.001 - 0s 95us/sample - loss: 0.0248 - val_loss: 0.2682
Epoch 501/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0202 - val_loss: 0.2251
Epoch 502/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0136 - val_loss: 0.2503
Epoch 503/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0139 - val_loss: 0.2321
Epoch 504/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0134 - val_loss: 0.2402
Epoch 505/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0134 - val_loss: 0.2322
Epoch 506/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0208 - val_loss: 0.2556
Epoch 507/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0186 - val_loss: 0.2337
Epoch 508/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0146 - val_loss: 0.2401
Epoch 509/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0144 - val_loss: 0.2367
Epoch 510/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0151 - val_loss: 0.2460
Epoch 511/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0137 - val_loss: 0.2603
Epoch 512/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0176 - val_loss: 0.2369
Epoch 513/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0143 - val_loss: 0.2326
Epoch 514/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0187 - val_loss: 0.2619
Epoch 515/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0249 - val_loss: 0.2288
Epoch 516/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0187 - val_loss: 0.2371
Epoch 517/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0157 - val_loss: 0.2510
Epoch 518/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0158 - val_loss: 0.2402
Epoch 519/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0134 - val_loss: 0.2503
Epoch 520/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0126 - val_loss: 0.2515
Epoch 521/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0130 - val_loss: 0.2468
Epoch 522/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0127 - val_loss: 0.2519
Epoch 523/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0120 - val_loss: 0.2438
Epoch 524/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0144 - val_loss: 0.2615
Epoch 525/600
426/426 [==============================] - 0s 106us/sample - loss: 0.0138 - val_loss: 0.2419
Epoch 526/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0125 - val_loss: 0.2465
Epoch 527/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0125 - val_loss: 0.2420
Epoch 528/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0185 - val_loss: 0.2731
Epoch 529/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0167 - val_loss: 0.2326
Epoch 530/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0193 - val_loss: 0.2584
Epoch 531/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0170 - val_loss: 0.2426
Epoch 532/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0141 - val_loss: 0.2471
Epoch 533/600
426/426 [==============================] - 0s 123us/sample - loss: 0.0132 - val_loss: 0.2534
Epoch 534/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0127 - val_loss: 0.2470
Epoch 535/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0142 - val_loss: 0.2649
Epoch 536/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0144 - val_loss: 0.2395
Epoch 537/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0155 - val_loss: 0.2527
Epoch 538/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0125 - val_loss: 0.2658
Epoch 539/600
426/426 [==============================] - 0s 106us/sample - loss: 0.0140 - val_loss: 0.2463
Epoch 540/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0126 - val_loss: 0.2521
Epoch 541/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0116 - val_loss: 0.2455
Epoch 542/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0132 - val_loss: 0.2970
Epoch 543/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0172 - val_loss: 0.2415
Epoch 544/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0183 - val_loss: 0.2610
Epoch 545/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0125 - val_loss: 0.2543
Epoch 546/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0163 - val_loss: 0.2473
Epoch 547/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0139 - val_loss: 0.2642
Epoch 548/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0134 - val_loss: 0.2489
Epoch 549/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0121 - val_loss: 0.2577
Epoch 550/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0118 - val_loss: 0.2750
Epoch 551/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0123 - val_loss: 0.2492
Epoch 552/600
426/426 [==============================] - 0s 107us/sample - loss: 0.0132 - val_loss: 0.2537
Epoch 553/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0116 - val_loss: 0.2546
Epoch 554/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0112 - val_loss: 0.2696
Epoch 555/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0119 - val_loss: 0.2549
Epoch 556/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0110 - val_loss: 0.2563
Epoch 557/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0122 - val_loss: 0.2533
Epoch 558/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0127 - val_loss: 0.2654
Epoch 559/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0105 - val_loss: 0.2603
Epoch 560/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0114 - val_loss: 0.2596
Epoch 561/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0115 - val_loss: 0.2538
Epoch 562/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0134 - val_loss: 0.2586
Epoch 563/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0110 - val_loss: 0.2628
Epoch 564/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0103 - val_loss: 0.2613
Epoch 565/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0105 - val_loss: 0.2593
Epoch 566/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0103 - val_loss: 0.2812
Epoch 567/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0131 - val_loss: 0.2635
Epoch 568/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0112 - val_loss: 0.2627
Epoch 569/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0106 - val_loss: 0.2714
Epoch 570/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0109 - val_loss: 0.2780
Epoch 571/600
426/426 [==============================] - 0s 113us/sample - loss: 0.0107 - val_loss: 0.2746
Epoch 572/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0113 - val_loss: 0.2629
Epoch 573/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0110 - val_loss: 0.2797
Epoch 574/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0101 - val_loss: 0.2651
Epoch 575/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0111 - val_loss: 0.2836
Epoch 576/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0100 - val_loss: 0.2573
Epoch 577/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0214 - val_loss: 0.2990
Epoch 578/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0160 - val_loss: 0.2638
Epoch 579/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0098 - val_loss: 0.2789
Epoch 580/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0102 - val_loss: 0.2774
Epoch 581/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0098 - val_loss: 0.2707
Epoch 582/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0107 - val_loss: 0.2872
Epoch 583/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0112 - val_loss: 0.2641
Epoch 584/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0100 - val_loss: 0.2766
Epoch 585/600
426/426 [==============================] - 0s 97us/sample - loss: 0.0101 - val_loss: 0.2658
Epoch 586/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0118 - val_loss: 0.2884
Epoch 587/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0104 - val_loss: 0.2694
Epoch 588/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0132 - val_loss: 0.2666
Epoch 589/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0102 - val_loss: 0.3018
Epoch 590/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0105 - val_loss: 0.2692
Epoch 591/600
426/426 [==============================] - 0s 112us/sample - loss: 0.0107 - val_loss: 0.2759
Epoch 592/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0119 - val_loss: 0.2892
Epoch 593/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0092 - val_loss: 0.2654
Epoch 594/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0110 - val_loss: 0.3104
Epoch 595/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0140 - val_loss: 0.2724
Epoch 596/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0104 - val_loss: 0.3031
Epoch 597/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0117 - val_loss: 0.2722
Epoch 598/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0101 - val_loss: 0.2829
Epoch 599/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0103 - val_loss: 0.2690
Epoch 600/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0103 - val_loss: 0.3006
Out[112]:
<tensorflow.python.keras.callbacks.History at 0x260b0653c48>
In [113]:
# model.history.history
In [114]:
model_loss = pd.DataFrame(model.history.history)
In [115]:
# model_loss
In [116]:
model_loss.plot()
Out[116]:
<matplotlib.axes._subplots.AxesSubplot at 0x260b1e4ff48>

Example Two: Early Stopping

We obviously trained too much! Let's use early stopping to track the val_loss and stop training once it begins increasing too much!

In [117]:
model = Sequential()
model.add(Dense(units=30,activation='relu'))
model.add(Dense(units=15,activation='relu'))
model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
In [119]:
from tensorflow.keras.callbacks import EarlyStopping

Stop training when a monitored quantity has stopped improving.

Arguments:
    monitor: Quantity to be monitored.
    min_delta: Minimum change in the monitored quantity
        to qualify as an improvement, i.e. an absolute
        change of less than min_delta, will count as no
        improvement.
    patience: Number of epochs with no improvement
        after which training will be stopped.
    verbose: verbosity mode.
    mode: One of `{"auto", "min", "max"}`. In `min` mode,
        training will stop when the quantity
        monitored has stopped decreasing; in `max`
        mode it will stop when the quantity
        monitored has stopped increasing; in `auto`
        mode, the direction is automatically inferred
        from the name of the monitored quantity.
In [121]:
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
In [122]:
model.fit(x=X_train, 
          y=y_train, 
          epochs=600,
          validation_data=(X_test, y_test), verbose=1,
          callbacks=[early_stop]
          )
Train on 426 samples, validate on 143 samples
Epoch 1/600
426/426 [==============================] - 0s 967us/sample - loss: 0.6927 - val_loss: 0.6778
Epoch 2/600
426/426 [==============================] - 0s 103us/sample - loss: 0.6653 - val_loss: 0.6505
Epoch 3/600
426/426 [==============================] - 0s 104us/sample - loss: 0.6330 - val_loss: 0.6182
Epoch 4/600
426/426 [==============================] - 0s 102us/sample - loss: 0.5982 - val_loss: 0.5789
Epoch 5/600
426/426 [==============================] - 0s 98us/sample - loss: 0.5523 - val_loss: 0.5269
Epoch 6/600
426/426 [==============================] - 0s 100us/sample - loss: 0.4953 - val_loss: 0.4664
Epoch 7/600
426/426 [==============================] - 0s 105us/sample - loss: 0.4343 - val_loss: 0.4037
Epoch 8/600
426/426 [==============================] - 0s 99us/sample - loss: 0.3751 - val_loss: 0.3478
Epoch 9/600
426/426 [==============================] - 0s 100us/sample - loss: 0.3233 - val_loss: 0.2980
Epoch 10/600
426/426 [==============================] - 0s 97us/sample - loss: 0.2816 - val_loss: 0.2608
Epoch 11/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2524 - val_loss: 0.2375
Epoch 12/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2286 - val_loss: 0.2125
Epoch 13/600
426/426 [==============================] - 0s 106us/sample - loss: 0.2103 - val_loss: 0.1996
Epoch 14/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1936 - val_loss: 0.1834
Epoch 15/600
426/426 [==============================] - 0s 104us/sample - loss: 0.1809 - val_loss: 0.1737
Epoch 16/600
426/426 [==============================] - 0s 99us/sample - loss: 0.1718 - val_loss: 0.1635
Epoch 17/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1605 - val_loss: 0.1568
Epoch 18/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1509 - val_loss: 0.1558
Epoch 19/600
426/426 [==============================] - 0s 114us/sample - loss: 0.1446 - val_loss: 0.1447
Epoch 20/600
426/426 [==============================] - 0s 108us/sample - loss: 0.1373 - val_loss: 0.1404
Epoch 21/600
426/426 [==============================] - 0s 105us/sample - loss: 0.1297 - val_loss: 0.1401
Epoch 22/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1232 - val_loss: 0.1344
Epoch 23/600
426/426 [==============================] - 0s 107us/sample - loss: 0.1174 - val_loss: 0.1309
Epoch 24/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1106 - val_loss: 0.1277
Epoch 25/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1049 - val_loss: 0.1309
Epoch 26/600
426/426 [==============================] - 0s 104us/sample - loss: 0.1007 - val_loss: 0.1214
Epoch 27/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0992 - val_loss: 0.1266
Epoch 28/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0937 - val_loss: 0.1182
Epoch 29/600
426/426 [==============================] - 0s 107us/sample - loss: 0.0900 - val_loss: 0.1222
Epoch 30/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0880 - val_loss: 0.1185
Epoch 31/600
426/426 [==============================] - 0s 106us/sample - loss: 0.0866 - val_loss: 0.1211
Epoch 32/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0862 - val_loss: 0.1171
Epoch 33/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0811 - val_loss: 0.1236
Epoch 34/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0815 - val_loss: 0.1206
Epoch 35/600
426/426 [==============================] - 0s 112us/sample - loss: 0.0805 - val_loss: 0.1146
Epoch 36/600
426/426 [==============================] - 0s 111us/sample - loss: 0.0770 - val_loss: 0.1147
Epoch 37/600
426/426 [==============================] - 0s 95us/sample - loss: 0.0740 - val_loss: 0.1174
Epoch 38/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0787 - val_loss: 0.1128
Epoch 39/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0706 - val_loss: 0.1170
Epoch 40/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0692 - val_loss: 0.1213
Epoch 41/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0681 - val_loss: 0.1173
Epoch 42/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0707 - val_loss: 0.1279
Epoch 43/600
426/426 [==============================] - 0s 107us/sample - loss: 0.0690 - val_loss: 0.1156
Epoch 44/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0663 - val_loss: 0.1169
Epoch 45/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0645 - val_loss: 0.1182
Epoch 46/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0664 - val_loss: 0.1147
Epoch 47/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0658 - val_loss: 0.1242
Epoch 48/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0623 - val_loss: 0.1164
Epoch 49/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0625 - val_loss: 0.1181
Epoch 50/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0617 - val_loss: 0.1189
Epoch 51/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0615 - val_loss: 0.1212
Epoch 52/600
426/426 [==============================] - 0s 108us/sample - loss: 0.0608 - val_loss: 0.1195
Epoch 53/600
426/426 [==============================] - 0s 110us/sample - loss: 0.0596 - val_loss: 0.1162
Epoch 54/600
426/426 [==============================] - 0s 115us/sample - loss: 0.0605 - val_loss: 0.1177
Epoch 55/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0596 - val_loss: 0.1185
Epoch 56/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0582 - val_loss: 0.1202
Epoch 57/600
426/426 [==============================] - 0s 115us/sample - loss: 0.0575 - val_loss: 0.1188
Epoch 58/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0590 - val_loss: 0.1166
Epoch 59/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0638 - val_loss: 0.1247
Epoch 60/600
426/426 [==============================] - 0s 98us/sample - loss: 0.0566 - val_loss: 0.1205
Epoch 61/600
426/426 [==============================] - 0s 96us/sample - loss: 0.0563 - val_loss: 0.1234
Epoch 62/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0665 - val_loss: 0.1172
Epoch 63/600
426/426 [==============================] - 0s 94us/sample - loss: 0.0588 - val_loss: 0.1261
Epoch 00063: early stopping
Out[122]:
<tensorflow.python.keras.callbacks.History at 0x260b20a8508>
In [124]:
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()
Out[124]:
<matplotlib.axes._subplots.AxesSubplot at 0x260b2923d08>

Example Three: Adding in DropOut Layers

In [125]:
from tensorflow.keras.layers import Dropout
In [126]:
model = Sequential()
model.add(Dense(units=30,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(units=15,activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(units=1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam')
In [127]:
model.fit(x=X_train, 
          y=y_train, 
          epochs=600,
          validation_data=(X_test, y_test), verbose=1,
          callbacks=[early_stop]
          )
Train on 426 samples, validate on 143 samples
Epoch 1/600
426/426 [==============================] - 0s 1ms/sample - loss: 0.6894 - val_loss: 0.6833
Epoch 2/600
426/426 [==============================] - 0s 101us/sample - loss: 0.6828 - val_loss: 0.6722
Epoch 3/600
426/426 [==============================] - 0s 103us/sample - loss: 0.6817 - val_loss: 0.6584
Epoch 4/600
426/426 [==============================] - 0s 99us/sample - loss: 0.6626 - val_loss: 0.6439
Epoch 5/600
426/426 [==============================] - 0s 97us/sample - loss: 0.6450 - val_loss: 0.6319
Epoch 6/600
426/426 [==============================] - 0s 101us/sample - loss: 0.6343 - val_loss: 0.6138
Epoch 7/600
426/426 [==============================] - 0s 108us/sample - loss: 0.6207 - val_loss: 0.5962
Epoch 8/600
426/426 [==============================] - 0s 103us/sample - loss: 0.6005 - val_loss: 0.5744
Epoch 9/600
426/426 [==============================] - 0s 103us/sample - loss: 0.5897 - val_loss: 0.5444
Epoch 10/600
426/426 [==============================] - 0s 103us/sample - loss: 0.5627 - val_loss: 0.5118
Epoch 11/600
426/426 [==============================] - 0s 105us/sample - loss: 0.5253 - val_loss: 0.4753
Epoch 12/600
426/426 [==============================] - 0s 103us/sample - loss: 0.5047 - val_loss: 0.4371
Epoch 13/600
426/426 [==============================] - 0s 117us/sample - loss: 0.5106 - val_loss: 0.4084
Epoch 14/600
426/426 [==============================] - 0s 112us/sample - loss: 0.4720 - val_loss: 0.3854
Epoch 15/600
426/426 [==============================] - 0s 108us/sample - loss: 0.4493 - val_loss: 0.3616
Epoch 16/600
426/426 [==============================] - 0s 110us/sample - loss: 0.4197 - val_loss: 0.3379
Epoch 17/600
426/426 [==============================] - 0s 101us/sample - loss: 0.4065 - val_loss: 0.3203
Epoch 18/600
426/426 [==============================] - 0s 110us/sample - loss: 0.4075 - val_loss: 0.2978
Epoch 19/600
426/426 [==============================] - 0s 98us/sample - loss: 0.3697 - val_loss: 0.2815
Epoch 20/600
426/426 [==============================] - 0s 100us/sample - loss: 0.3830 - val_loss: 0.2639
Epoch 21/600
426/426 [==============================] - 0s 105us/sample - loss: 0.3382 - val_loss: 0.2479
Epoch 22/600
426/426 [==============================] - 0s 100us/sample - loss: 0.3434 - val_loss: 0.2353
Epoch 23/600
426/426 [==============================] - 0s 101us/sample - loss: 0.3226 - val_loss: 0.2264
Epoch 24/600
426/426 [==============================] - 0s 103us/sample - loss: 0.3114 - val_loss: 0.2149
Epoch 25/600
426/426 [==============================] - 0s 103us/sample - loss: 0.3048 - val_loss: 0.2018
Epoch 26/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2849 - val_loss: 0.1914
Epoch 27/600
426/426 [==============================] - 0s 112us/sample - loss: 0.3049 - val_loss: 0.1915
Epoch 28/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2759 - val_loss: 0.1818
Epoch 29/600
426/426 [==============================] - 0s 105us/sample - loss: 0.2556 - val_loss: 0.1692
Epoch 30/600
426/426 [==============================] - 0s 105us/sample - loss: 0.2710 - val_loss: 0.1626
Epoch 31/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2513 - val_loss: 0.1651
Epoch 32/600
426/426 [==============================] - 0s 108us/sample - loss: 0.2271 - val_loss: 0.1604
Epoch 33/600
426/426 [==============================] - 0s 101us/sample - loss: 0.2228 - val_loss: 0.1485
Epoch 34/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2075 - val_loss: 0.1436
Epoch 35/600
426/426 [==============================] - 0s 106us/sample - loss: 0.2497 - val_loss: 0.1485
Epoch 36/600
426/426 [==============================] - 0s 110us/sample - loss: 0.2471 - val_loss: 0.1463
Epoch 37/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2297 - val_loss: 0.1353
Epoch 38/600
426/426 [==============================] - 0s 105us/sample - loss: 0.2069 - val_loss: 0.1347
Epoch 39/600
426/426 [==============================] - 0s 101us/sample - loss: 0.2065 - val_loss: 0.1362
Epoch 40/600
426/426 [==============================] - 0s 105us/sample - loss: 0.2121 - val_loss: 0.1332
Epoch 41/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2117 - val_loss: 0.1285
Epoch 42/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2076 - val_loss: 0.1344
Epoch 43/600
426/426 [==============================] - 0s 103us/sample - loss: 0.2027 - val_loss: 0.1351
Epoch 44/600
426/426 [==============================] - 0s 112us/sample - loss: 0.1709 - val_loss: 0.1189
Epoch 45/600
426/426 [==============================] - 0s 109us/sample - loss: 0.1620 - val_loss: 0.1149
Epoch 46/600
426/426 [==============================] - 0s 117us/sample - loss: 0.1735 - val_loss: 0.1234
Epoch 47/600
426/426 [==============================] - 0s 115us/sample - loss: 0.1710 - val_loss: 0.1131
Epoch 48/600
426/426 [==============================] - 0s 105us/sample - loss: 0.1628 - val_loss: 0.1112
Epoch 49/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1824 - val_loss: 0.1270
Epoch 50/600
426/426 [==============================] - 0s 106us/sample - loss: 0.1905 - val_loss: 0.1101
Epoch 51/600
426/426 [==============================] - 0s 110us/sample - loss: 0.1841 - val_loss: 0.1075
Epoch 52/600
426/426 [==============================] - 0s 119us/sample - loss: 0.1650 - val_loss: 0.1062
Epoch 53/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1576 - val_loss: 0.1089
Epoch 54/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1636 - val_loss: 0.1059
Epoch 55/600
426/426 [==============================] - 0s 97us/sample - loss: 0.1857 - val_loss: 0.1050
Epoch 56/600
426/426 [==============================] - 0s 96us/sample - loss: 0.1671 - val_loss: 0.1035
Epoch 57/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1833 - val_loss: 0.1034
Epoch 58/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1444 - val_loss: 0.1045
Epoch 59/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1601 - val_loss: 0.1026
Epoch 60/600
426/426 [==============================] - 0s 97us/sample - loss: 0.1622 - val_loss: 0.1004
Epoch 61/600
426/426 [==============================] - 0s 97us/sample - loss: 0.1687 - val_loss: 0.1115
Epoch 62/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1508 - val_loss: 0.1101
Epoch 63/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1349 - val_loss: 0.1008
Epoch 64/600
426/426 [==============================] - 0s 97us/sample - loss: 0.1498 - val_loss: 0.0994
Epoch 65/600
426/426 [==============================] - 0s 99us/sample - loss: 0.1401 - val_loss: 0.0989
Epoch 66/600
426/426 [==============================] - 0s 97us/sample - loss: 0.1494 - val_loss: 0.1005
Epoch 67/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1411 - val_loss: 0.1055
Epoch 68/600
426/426 [==============================] - 0s 99us/sample - loss: 0.1453 - val_loss: 0.0959
Epoch 69/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1608 - val_loss: 0.0978
Epoch 70/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1537 - val_loss: 0.1042
Epoch 71/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1324 - val_loss: 0.1009
Epoch 72/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1238 - val_loss: 0.1012
Epoch 73/600
426/426 [==============================] - 0s 108us/sample - loss: 0.1334 - val_loss: 0.0958
Epoch 74/600
426/426 [==============================] - 0s 113us/sample - loss: 0.1238 - val_loss: 0.0956
Epoch 75/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1210 - val_loss: 0.1081
Epoch 76/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1400 - val_loss: 0.1109
Epoch 77/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1371 - val_loss: 0.0992
Epoch 78/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1228 - val_loss: 0.1019
Epoch 79/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1097 - val_loss: 0.0958
Epoch 80/600
426/426 [==============================] - 0s 104us/sample - loss: 0.1086 - val_loss: 0.0939
Epoch 81/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1100 - val_loss: 0.0974
Epoch 82/600
426/426 [==============================] - 0s 107us/sample - loss: 0.1394 - val_loss: 0.0995
Epoch 83/600
426/426 [==============================] - 0s 110us/sample - loss: 0.1069 - val_loss: 0.0951
Epoch 84/600
426/426 [==============================] - 0s 108us/sample - loss: 0.1095 - val_loss: 0.0961
Epoch 85/600
426/426 [==============================] - 0s 108us/sample - loss: 0.1262 - val_loss: 0.0995
Epoch 86/600
426/426 [==============================] - 0s 105us/sample - loss: 0.1187 - val_loss: 0.1073
Epoch 87/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1299 - val_loss: 0.1042
Epoch 88/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1254 - val_loss: 0.0914
Epoch 89/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1074 - val_loss: 0.0981
Epoch 90/600
426/426 [==============================] - 0s 100us/sample - loss: 0.1012 - val_loss: 0.0926
Epoch 91/600
426/426 [==============================] - 0s 101us/sample - loss: 0.1200 - val_loss: 0.0911
Epoch 92/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1301 - val_loss: 0.0937
Epoch 93/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1004 - val_loss: 0.0917
Epoch 94/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0971 - val_loss: 0.1013
Epoch 95/600
426/426 [==============================] - 0s 108us/sample - loss: 0.1240 - val_loss: 0.0910
Epoch 96/600
426/426 [==============================] - 0s 112us/sample - loss: 0.0974 - val_loss: 0.0979
Epoch 97/600
426/426 [==============================] - 0s 112us/sample - loss: 0.1092 - val_loss: 0.0995
Epoch 98/600
426/426 [==============================] - 0s 108us/sample - loss: 0.1039 - val_loss: 0.0935
Epoch 99/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1138 - val_loss: 0.0942
Epoch 100/600
426/426 [==============================] - 0s 110us/sample - loss: 0.1013 - val_loss: 0.0886
Epoch 101/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0931 - val_loss: 0.0962
Epoch 102/600
426/426 [==============================] - 0s 114us/sample - loss: 0.1034 - val_loss: 0.1038
Epoch 103/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1084 - val_loss: 0.0904
Epoch 104/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1028 - val_loss: 0.0936
Epoch 105/600
426/426 [==============================] - 0s 98us/sample - loss: 0.1086 - val_loss: 0.1150
Epoch 106/600
426/426 [==============================] - 0s 100us/sample - loss: 0.0957 - val_loss: 0.1001
Epoch 107/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1069 - val_loss: 0.0919
Epoch 108/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1133 - val_loss: 0.0909
Epoch 109/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0913 - val_loss: 0.0921
Epoch 110/600
426/426 [==============================] - 0s 102us/sample - loss: 0.1035 - val_loss: 0.0972
Epoch 111/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0948 - val_loss: 0.0918
Epoch 112/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0935 - val_loss: 0.0879
Epoch 113/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0926 - val_loss: 0.0895
Epoch 114/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1118 - val_loss: 0.0948
Epoch 115/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0988 - val_loss: 0.0949
Epoch 116/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0989 - val_loss: 0.0998
Epoch 117/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0862 - val_loss: 0.0958
Epoch 118/600
426/426 [==============================] - 0s 106us/sample - loss: 0.0874 - val_loss: 0.1016
Epoch 119/600
426/426 [==============================] - 0s 114us/sample - loss: 0.0872 - val_loss: 0.0903
Epoch 120/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0837 - val_loss: 0.0891
Epoch 121/600
426/426 [==============================] - 0s 102us/sample - loss: 0.0792 - val_loss: 0.0958
Epoch 122/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0897 - val_loss: 0.0931
Epoch 123/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0768 - val_loss: 0.1000
Epoch 124/600
426/426 [==============================] - 0s 103us/sample - loss: 0.1034 - val_loss: 0.1016
Epoch 125/600
426/426 [==============================] - 0s 105us/sample - loss: 0.0951 - val_loss: 0.0962
Epoch 126/600
426/426 [==============================] - 0s 112us/sample - loss: 0.1027 - val_loss: 0.0997
Epoch 127/600
426/426 [==============================] - 0s 105us/sample - loss: 0.1118 - val_loss: 0.0951
Epoch 128/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0911 - val_loss: 0.0979
Epoch 129/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0955 - val_loss: 0.0954
Epoch 130/600
426/426 [==============================] - 0s 105us/sample - loss: 0.1043 - val_loss: 0.0945
Epoch 131/600
426/426 [==============================] - 0s 99us/sample - loss: 0.0746 - val_loss: 0.0893
Epoch 132/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0892 - val_loss: 0.0981
Epoch 133/600
426/426 [==============================] - 0s 103us/sample - loss: 0.0792 - val_loss: 0.0917
Epoch 134/600
426/426 [==============================] - 0s 101us/sample - loss: 0.0745 - val_loss: 0.0945
Epoch 135/600
426/426 [==============================] - 0s 105us/sample - loss: 0.1174 - val_loss: 0.0962
Epoch 136/600
426/426 [==============================] - 0s 99us/sample - loss: 0.1183 - val_loss: 0.0955
Epoch 137/600
426/426 [==============================] - 0s 104us/sample - loss: 0.0861 - val_loss: 0.0970
Epoch 00137: early stopping
Out[127]:
<tensorflow.python.keras.callbacks.History at 0x260b3c18288>
In [128]:
model_loss = pd.DataFrame(model.history.history)
model_loss.plot()
Out[128]:
<matplotlib.axes._subplots.AxesSubplot at 0x260b4590288>

Model Evaluation

In [129]:
predictions = model.predict_classes(X_test)
In [130]:
from sklearn.metrics import classification_report,confusion_matrix
In [131]:
# https://en.wikipedia.org/wiki/Precision_and_recall
print(classification_report(y_test,predictions))
              precision    recall  f1-score   support

           0       0.96      0.98      0.97        55
           1       0.99      0.98      0.98        88

    accuracy                           0.98       143
   macro avg       0.98      0.98      0.98       143
weighted avg       0.98      0.98      0.98       143

In [133]:
print(confusion_matrix(y_test,predictions))
[[54  1]
 [ 2 86]]
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [1]:
n = int(input())
sv = list(map(int, input().split()))
mc = list(map(int, input().split()))

sv.sort()
mc.sort()

i = 0
while i < n:
    if mc[i] < sv[i]:
        i += 1
        continue
    else:
        print('NO')
        break
else:
    print('YES')
5
123 146 454 542 456
100 328 248 689 200
NO
In [5]:
try:
    s = int(input())
    d = int(input())

    mn, mx = (10**(d-1)), (10**d - 1)

    def sm(n):
        r = 0
        while n:
            r, n = r + n % 10, n // 10
        return r

    for i in range(mn, mx+1):
        if sm(i) == s:
            print('Smallest number is',i)
            break
    else:
        print('Not possible')

except:
    print('Not possible')
9
2
Smallest number is 18
In [10]:
n = int(input())
d = list(map(int, input().split()))

def checkt(degree, n):
    smd = sum(degree)
    if (2*(n-1) == smd):
        return True
    else:
        return False
    
if checkt(d):
    print('Yes')
else:
    print('No')
6
In [2]:
#!/usr/bin/env python
# coding: utf-8

# In[103]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
import seaborn as sns
get_ipython().run_line_magic('matplotlib', 'inline')


# In[104]:


df=pd.read_csv('1 (2).csv')


# In[105]:


df.head()


# In[106]:


x=df['Time'][1:]
y=df['Average(A)'][1:]


# In[107]:


x.dtype


# In[108]:


y.dtype


# In[109]:


x = x.astype(float, errors = 'raise')
y = y.astype(float, errors = 'raise')


# In[110]:


sns.lineplot(x,y)


# In[111]:


peaks= find_peaks(y, height = 1,prominence=3, distance = 1)
height = peaks[1]['peak_heights'] #list of the heights of the peaks
peak_pos = x[peaks[0]] #list of the peaks positions


# In[112]:


print(height)


# In[113]:


print(peaks[0])


# In[114]:


print(peak_pos)


# In[118]:


plt.plot(peaks[0], y[peaks[0]], "xr")
plt.plot(y)
# plt.legend(['prominence'])


# In[119]:


from astropy import modeling


# In[147]:


fitter = modeling.fitting.LevMarLSQFitter()
model = modeling.models.Gaussian1D(amplitude=200, mean=0.026846, stddev=0.783768)   
# depending on the data you need to give some initial values
fitted_model = fitter(model, x, y)


# In[148]:


plt.plot(x, y)
plt.plot(x, fitted_model(x))


# In[ ]:
[1.874792 1.967807 1.919506 1.988502 1.9861   1.91505  1.994727 1.812605
 1.991404 1.990032 1.446508 1.924321 1.431366 1.587399 1.284044 1.888242
 1.650057 1.599253 1.432561 1.857474 1.682781 1.52087  1.984167 1.968706
 1.955412 1.96801  1.280957 1.923685 1.871919 1.545619 1.808725 1.663648
 1.3478   1.411324]
[  1697   2691   3593   5226   6261   7199   8551   9524  10532  11545
  14521  15582  47481  57708  58624  67907  68826  78152  79067  88376
  89296  93854  94889  95850  96772  97662 101943 102866 103782 104716
 105636 106544 115851 116777]
1697        6.783700
2691       10.759700
3593       14.367700
5226       20.899700
6261       25.039700
7199       28.791700
8551       34.199700
9524       38.091699
10532      42.123699
11545      46.175699
14521      58.079699
15582      62.323699
47481     189.919695
57708     230.827694
58624     234.491694
67907     271.623693
68826     275.299693
78152     312.603692
79067     316.263691
88376     353.499690
89296     357.179690
93854     375.411690
94889     379.551690
95850     383.395690
96772     387.083689
97662     390.643689
101943    407.767689
102866    411.459689
103782    415.123689
104716    418.859689
105636    422.539689
106544    426.171688
115851    463.399687
116777    467.103687
Name: Time, dtype: float64
Out[2]:
[<matplotlib.lines.Line2D at 0x29d5c6b8e20>]
In [ ]:
 
In [ ]:
 
In [ ]: